1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
45 /* Pattern recognition functions */
46 static gimple
*vect_recog_widen_sum_pattern (vec
<gimple
*> *, tree
*,
48 static gimple
*vect_recog_widen_mult_pattern (vec
<gimple
*> *, tree
*,
50 static gimple
*vect_recog_dot_prod_pattern (vec
<gimple
*> *, tree
*,
52 static gimple
*vect_recog_sad_pattern (vec
<gimple
*> *, tree
*,
54 static gimple
*vect_recog_pow_pattern (vec
<gimple
*> *, tree
*, tree
*);
55 static gimple
*vect_recog_over_widening_pattern (vec
<gimple
*> *, tree
*,
57 static gimple
*vect_recog_widen_shift_pattern (vec
<gimple
*> *,
59 static gimple
*vect_recog_rotate_pattern (vec
<gimple
*> *, tree
*, tree
*);
60 static gimple
*vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *,
62 static gimple
*vect_recog_divmod_pattern (vec
<gimple
*> *,
65 static gimple
*vect_recog_mult_pattern (vec
<gimple
*> *,
68 static gimple
*vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *,
70 static gimple
*vect_recog_bool_pattern (vec
<gimple
*> *, tree
*, tree
*);
71 static gimple
*vect_recog_mask_conversion_pattern (vec
<gimple
*> *, tree
*, tree
*);
73 struct vect_recog_func
75 vect_recog_func_ptr fn
;
79 /* Note that ordering matters - the first pattern matching on a stmt
80 is taken which means usually the more complex one needs to preceed
81 the less comples onex (widen_sum only after dot_prod or sad for example). */
82 static vect_recog_func vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
83 { vect_recog_widen_mult_pattern
, "widen_mult" },
84 { vect_recog_dot_prod_pattern
, "dot_prod" },
85 { vect_recog_sad_pattern
, "sad" },
86 { vect_recog_widen_sum_pattern
, "widen_sum" },
87 { vect_recog_pow_pattern
, "pow" },
88 { vect_recog_widen_shift_pattern
, "widen_shift" },
89 { vect_recog_over_widening_pattern
, "over_widening" },
90 { vect_recog_rotate_pattern
, "rotate" },
91 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
92 { vect_recog_divmod_pattern
, "divmod" },
93 { vect_recog_mult_pattern
, "mult" },
94 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
95 { vect_recog_bool_pattern
, "bool" },
96 { vect_recog_mask_conversion_pattern
, "mask_conversion" }
100 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
107 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
110 append_pattern_def_seq (stmt_info
, stmt
);
113 /* Check whether STMT2 is in the same loop or basic block as STMT1.
114 Which of the two applies depends on whether we're currently doing
115 loop-based or basic-block-based vectorization, as determined by
116 the vinfo_for_stmt for STMT1 (which must be defined).
118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119 to be defined as well. */
122 vect_same_loop_or_bb_p (gimple
*stmt1
, gimple
*stmt2
)
124 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
125 return vect_stmt_in_region_p (stmt_vinfo
->vinfo
, stmt2
);
128 /* If the LHS of DEF_STMT has a single use, and that statement is
129 in the same loop or basic block, return it. */
132 vect_single_imm_use (gimple
*def_stmt
)
134 tree lhs
= gimple_assign_lhs (def_stmt
);
138 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
141 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
147 /* Check whether NAME, an ssa-name used in USE_STMT,
148 is a result of a type promotion, such that:
149 DEF_STMT: NAME = NOP (name0)
150 If CHECK_SIGN is TRUE, check that either both types are signed or both are
154 type_conversion_p (tree name
, gimple
*use_stmt
, bool check_sign
,
155 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
157 gimple
*dummy_gimple
;
158 stmt_vec_info stmt_vinfo
;
159 tree type
= TREE_TYPE (name
);
161 enum vect_def_type dt
;
163 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
164 if (!vect_is_simple_use (name
, stmt_vinfo
->vinfo
, def_stmt
, &dt
))
167 if (dt
!= vect_internal_def
168 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
174 if (dt
== vect_internal_def
)
176 stmt_vec_info def_vinfo
= vinfo_for_stmt (*def_stmt
);
177 if (STMT_VINFO_IN_PATTERN_P (def_vinfo
))
181 if (!is_gimple_assign (*def_stmt
))
184 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
187 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
189 *orig_type
= TREE_TYPE (oprnd0
);
190 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
191 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
194 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
199 if (!vect_is_simple_use (oprnd0
, stmt_vinfo
->vinfo
, &dummy_gimple
, &dt
))
205 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
206 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
209 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
211 return make_temp_ssa_name (type
, stmt
, "patt");
214 /* Function vect_recog_dot_prod_pattern
216 Try to find the following pattern:
222 sum_0 = phi <init, sum_1>
225 S3 x_T = (TYPE1) x_t;
226 S4 y_T = (TYPE1) y_t;
228 [S6 prod = (TYPE2) prod; #optional]
229 S7 sum_1 = prod + sum_0;
231 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
232 same size of 'TYPE1' or bigger. This is a special case of a reduction
237 * STMTS: Contains a stmt from which the pattern search begins. In the
238 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
243 * TYPE_IN: The type of the input arguments to the pattern.
245 * TYPE_OUT: The type of the output of this pattern.
247 * Return value: A new stmt that will be used to replace the sequence of
248 stmts that constitute the pattern. In this case it will be:
249 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
251 Note: The dot-prod idiom is a widening reduction pattern that is
252 vectorized without preserving all the intermediate results. It
253 produces only N/2 (widened) results (by summing up pairs of
254 intermediate results) rather than all N results. Therefore, we
255 cannot allow this pattern when we want to get all the results and in
256 the correct order (as is the case when this computation is in an
257 inner-loop nested in an outer-loop that us being vectorized). */
260 vect_recog_dot_prod_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
263 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
265 tree oprnd00
, oprnd01
;
266 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
267 tree type
, half_type
;
268 gimple
*pattern_stmt
;
270 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
278 loop
= LOOP_VINFO_LOOP (loop_info
);
280 /* We don't allow changing the order of the computation in the inner-loop
281 when doing outer-loop vectorization. */
282 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
285 if (!is_gimple_assign (last_stmt
))
288 type
= gimple_expr_type (last_stmt
);
290 /* Look for the following pattern
294 DDPROD = (TYPE2) DPROD;
295 sum_1 = DDPROD + sum_0;
297 - DX is double the size of X
298 - DY is double the size of Y
299 - DX, DY, DPROD all have the same type
300 - sum is the same size of DPROD or bigger
301 - sum has been recognized as a reduction variable.
303 This is equivalent to:
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD w+ sum_0; #widen summation
307 DPROD = X w* Y; #widen mult
308 sum_1 = DPROD + sum_0; #summation
311 /* Starting from LAST_STMT, follow the defs of its uses in search
312 of the above pattern. */
314 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
317 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
319 /* Has been detected as widening-summation? */
321 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
322 type
= gimple_expr_type (stmt
);
323 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
325 oprnd0
= gimple_assign_rhs1 (stmt
);
326 oprnd1
= gimple_assign_rhs2 (stmt
);
327 half_type
= TREE_TYPE (oprnd0
);
333 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
334 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo
))
336 oprnd0
= gimple_assign_rhs1 (last_stmt
);
337 oprnd1
= gimple_assign_rhs2 (last_stmt
);
338 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
339 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
343 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
348 oprnd0
= gimple_assign_rhs1 (stmt
);
354 /* So far so good. Since last_stmt was detected as a (summation) reduction,
355 we know that oprnd1 is the reduction variable (defined by a loop-header
356 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
357 Left to check that oprnd0 is defined by a (widen_)mult_expr */
358 if (TREE_CODE (oprnd0
) != SSA_NAME
)
361 prod_type
= half_type
;
362 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
364 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
365 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
368 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
369 inside the loop (in case we are analyzing an outer-loop). */
370 if (!is_gimple_assign (stmt
))
372 stmt_vinfo
= vinfo_for_stmt (stmt
);
373 gcc_assert (stmt_vinfo
);
374 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
376 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
378 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
380 /* Has been detected as a widening multiplication? */
382 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
383 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
385 stmt_vinfo
= vinfo_for_stmt (stmt
);
386 gcc_assert (stmt_vinfo
);
387 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
388 oprnd00
= gimple_assign_rhs1 (stmt
);
389 oprnd01
= gimple_assign_rhs2 (stmt
);
390 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
391 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
395 tree half_type0
, half_type1
;
399 oprnd0
= gimple_assign_rhs1 (stmt
);
400 oprnd1
= gimple_assign_rhs2 (stmt
);
401 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
402 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
404 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
408 oprnd00
= gimple_assign_rhs1 (def_stmt
);
409 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
413 oprnd01
= gimple_assign_rhs1 (def_stmt
);
414 if (!types_compatible_p (half_type0
, half_type1
))
416 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
420 half_type
= TREE_TYPE (oprnd00
);
421 *type_in
= half_type
;
424 /* Pattern detected. Create a stmt to be used to replace the pattern: */
425 var
= vect_recog_temp_ssa_var (type
, NULL
);
426 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
427 oprnd00
, oprnd01
, oprnd1
);
429 if (dump_enabled_p ())
431 dump_printf_loc (MSG_NOTE
, vect_location
,
432 "vect_recog_dot_prod_pattern: detected: ");
433 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
440 /* Function vect_recog_sad_pattern
442 Try to find the following Sum of Absolute Difference (SAD) pattern:
445 signed TYPE1 diff, abs_diff;
448 sum_0 = phi <init, sum_1>
451 S3 x_T = (TYPE1) x_t;
452 S4 y_T = (TYPE1) y_t;
454 S6 abs_diff = ABS_EXPR <diff>;
455 [S7 abs_diff = (TYPE2) abs_diff; #optional]
456 S8 sum_1 = abs_diff + sum_0;
458 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
459 same size of 'TYPE1' or bigger. This is a special case of a reduction
464 * STMTS: Contains a stmt from which the pattern search begins. In the
465 example, when this function is called with S8, the pattern
466 {S3,S4,S5,S6,S7,S8} will be detected.
470 * TYPE_IN: The type of the input arguments to the pattern.
472 * TYPE_OUT: The type of the output of this pattern.
474 * Return value: A new stmt that will be used to replace the sequence of
475 stmts that constitute the pattern. In this case it will be:
476 SAD_EXPR <x_t, y_t, sum_0>
480 vect_recog_sad_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
483 gimple
*last_stmt
= (*stmts
)[0];
484 tree sad_oprnd0
, sad_oprnd1
;
485 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
487 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
494 loop
= LOOP_VINFO_LOOP (loop_info
);
496 /* We don't allow changing the order of the computation in the inner-loop
497 when doing outer-loop vectorization. */
498 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
501 if (!is_gimple_assign (last_stmt
))
504 tree sum_type
= gimple_expr_type (last_stmt
);
506 /* Look for the following pattern
510 DAD = ABS_EXPR <DDIFF>;
511 DDPROD = (TYPE2) DPROD;
514 - DX is at least double the size of X
515 - DY is at least double the size of Y
516 - DX, DY, DDIFF, DAD all have the same type
517 - sum is the same size of DAD or bigger
518 - sum has been recognized as a reduction variable.
520 This is equivalent to:
521 DDIFF = X w- Y; #widen sub
522 DAD = ABS_EXPR <DDIFF>;
523 sum_1 = DAD w+ sum_0; #widen summation
525 DDIFF = X w- Y; #widen sub
526 DAD = ABS_EXPR <DDIFF>;
527 sum_1 = DAD + sum_0; #summation
530 /* Starting from LAST_STMT, follow the defs of its uses in search
531 of the above pattern. */
533 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
536 tree plus_oprnd0
, plus_oprnd1
;
538 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
540 /* Has been detected as widening-summation? */
542 gimple
*stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
543 sum_type
= gimple_expr_type (stmt
);
544 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
546 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
547 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
548 half_type
= TREE_TYPE (plus_oprnd0
);
554 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
555 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo
))
557 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
558 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
559 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
560 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
563 /* The type conversion could be promotion, demotion,
564 or just signed -> unsigned. */
565 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
566 &half_type
, &def_stmt
, &promotion
))
567 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
569 half_type
= sum_type
;
572 /* So far so good. Since last_stmt was detected as a (summation) reduction,
573 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
574 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
575 Then check that plus_oprnd0 is defined by an abs_expr. */
577 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
580 tree abs_type
= half_type
;
581 gimple
*abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
583 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
584 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
587 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
588 inside the loop (in case we are analyzing an outer-loop). */
589 if (!is_gimple_assign (abs_stmt
))
592 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
593 gcc_assert (abs_stmt_vinfo
);
594 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
596 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
599 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
600 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
602 if (TYPE_UNSIGNED (abs_type
))
605 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
607 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
610 gimple
*diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
612 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
613 if (!gimple_bb (diff_stmt
)
614 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
617 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
618 inside the loop (in case we are analyzing an outer-loop). */
619 if (!is_gimple_assign (diff_stmt
))
622 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
623 gcc_assert (diff_stmt_vinfo
);
624 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
626 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
629 tree half_type0
, half_type1
;
632 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
633 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
635 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
636 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
638 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
639 &half_type0
, &def_stmt
, &promotion
)
642 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
644 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
645 &half_type1
, &def_stmt
, &promotion
)
648 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
650 if (!types_compatible_p (half_type0
, half_type1
))
652 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
653 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
656 *type_in
= TREE_TYPE (sad_oprnd0
);
657 *type_out
= sum_type
;
659 /* Pattern detected. Create a stmt to be used to replace the pattern: */
660 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
661 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
662 sad_oprnd1
, plus_oprnd1
);
664 if (dump_enabled_p ())
666 dump_printf_loc (MSG_NOTE
, vect_location
,
667 "vect_recog_sad_pattern: detected: ");
668 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
675 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
678 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
679 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
681 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
682 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
683 that satisfies the above restrictions, we can perform a widening opeartion
684 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
685 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
688 vect_handle_widen_op_by_const (gimple
*stmt
, enum tree_code code
,
689 tree const_oprnd
, tree
*oprnd
,
690 gimple
**wstmt
, tree type
,
691 tree
*half_type
, gimple
*def_stmt
)
693 tree new_type
, new_oprnd
;
695 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
698 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
699 || (code
== LSHIFT_EXPR
700 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
702 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
704 /* CONST_OPRND is a constant of HALF_TYPE. */
705 *oprnd
= gimple_assign_rhs1 (def_stmt
);
709 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
712 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
715 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
716 a type 2 times bigger than HALF_TYPE. */
717 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
718 TYPE_UNSIGNED (type
));
719 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
720 || (code
== LSHIFT_EXPR
721 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
724 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
725 *oprnd
= gimple_assign_rhs1 (def_stmt
);
726 new_oprnd
= make_ssa_name (new_type
);
727 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
730 *half_type
= new_type
;
735 /* Function vect_recog_widen_mult_pattern
737 Try to find the following pattern:
741 TYPE a_T, b_T, prod_T;
747 S5 prod_T = a_T * b_T;
749 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
751 Also detect unsigned cases:
755 unsigned TYPE u_prod_T;
756 TYPE a_T, b_T, prod_T;
762 S5 prod_T = a_T * b_T;
763 S6 u_prod_T = (unsigned TYPE) prod_T;
765 and multiplication by constants:
772 S5 prod_T = a_T * CONST;
774 A special case of multiplication by constants is when 'TYPE' is 4 times
775 bigger than 'type', but CONST fits an intermediate type 2 times smaller
776 than 'TYPE'. In that case we create an additional pattern stmt for S3
777 to create a variable of the intermediate type, and perform widen-mult
778 on the intermediate type as well:
782 TYPE a_T, prod_T, prod_T';
786 '--> a_it = (interm_type) a_t;
787 S5 prod_T = a_T * CONST;
788 '--> prod_T' = a_it w* CONST;
792 * STMTS: Contains a stmt from which the pattern search begins. In the
793 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
794 is detected. In case of unsigned widen-mult, the original stmt (S5) is
795 replaced with S6 in STMTS. In case of multiplication by a constant
796 of an intermediate type (the last case above), STMTS also contains S3
797 (inserted before S5).
801 * TYPE_IN: The type of the input arguments to the pattern.
803 * TYPE_OUT: The type of the output of this pattern.
805 * Return value: A new stmt that will be used to replace the sequence of
806 stmts that constitute the pattern. In this case it will be:
807 WIDEN_MULT <a_t, b_t>
808 If the result of WIDEN_MULT needs to be converted to a larger type, the
809 returned stmt will be this type conversion stmt.
813 vect_recog_widen_mult_pattern (vec
<gimple
*> *stmts
,
814 tree
*type_in
, tree
*type_out
)
816 gimple
*last_stmt
= stmts
->pop ();
817 gimple
*def_stmt0
, *def_stmt1
;
819 tree type
, half_type0
, half_type1
;
820 gimple
*new_stmt
= NULL
, *pattern_stmt
= NULL
;
821 tree vectype
, vecitype
;
823 enum tree_code dummy_code
;
829 if (!is_gimple_assign (last_stmt
))
832 type
= gimple_expr_type (last_stmt
);
834 /* Starting from LAST_STMT, follow the defs of its uses in search
835 of the above pattern. */
837 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
840 oprnd0
= gimple_assign_rhs1 (last_stmt
);
841 oprnd1
= gimple_assign_rhs2 (last_stmt
);
842 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
843 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
846 /* Check argument 0. */
847 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
851 /* Check argument 1. */
852 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
853 &def_stmt1
, &promotion
);
855 if (op1_ok
&& promotion
)
857 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
858 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
862 if (TREE_CODE (oprnd1
) == INTEGER_CST
863 && TREE_CODE (half_type0
) == INTEGER_TYPE
864 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
865 &oprnd0
, &new_stmt
, type
,
866 &half_type0
, def_stmt0
))
868 half_type1
= half_type0
;
869 oprnd1
= fold_convert (half_type1
, oprnd1
);
875 /* If the two arguments have different sizes, convert the one with
876 the smaller type into the larger type. */
877 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
879 /* If we already used up the single-stmt slot give up. */
884 gimple
*def_stmt
= NULL
;
886 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
888 def_stmt
= def_stmt0
;
889 half_type0
= half_type1
;
894 def_stmt
= def_stmt1
;
895 half_type1
= half_type0
;
899 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
900 tree new_oprnd
= make_ssa_name (half_type0
);
901 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
905 /* Handle unsigned case. Look for
906 S6 u_prod_T = (unsigned TYPE) prod_T;
907 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
908 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
914 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
917 use_stmt
= vect_single_imm_use (last_stmt
);
918 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
919 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
922 use_lhs
= gimple_assign_lhs (use_stmt
);
923 use_type
= TREE_TYPE (use_lhs
);
924 if (!INTEGRAL_TYPE_P (use_type
)
925 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
926 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
930 last_stmt
= use_stmt
;
933 if (!types_compatible_p (half_type0
, half_type1
))
936 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
937 to get an intermediate result of type ITYPE. In this case we need
938 to build a statement to convert this intermediate result to type TYPE. */
940 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
941 itype
= build_nonstandard_integer_type
942 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
943 TYPE_UNSIGNED (type
));
945 /* Pattern detected. */
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE
, vect_location
,
948 "vect_recog_widen_mult_pattern: detected:\n");
950 /* Check target support */
951 vectype
= get_vectype_for_scalar_type (half_type0
);
952 vecitype
= get_vectype_for_scalar_type (itype
);
955 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
957 &dummy_code
, &dummy_code
,
958 &dummy_int
, &dummy_vec
))
962 *type_out
= get_vectype_for_scalar_type (type
);
964 /* Pattern supported. Create a stmt to be used to replace the pattern: */
965 var
= vect_recog_temp_ssa_var (itype
, NULL
);
966 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
968 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
969 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
971 /* If the original two operands have different sizes, we may need to convert
972 the smaller one into the larget type. If this is the case, at this point
973 the new stmt is already built. */
976 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
977 stmt_vec_info new_stmt_info
978 = new_stmt_vec_info (new_stmt
, stmt_vinfo
->vinfo
);
979 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
980 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
983 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
984 the result of the widen-mult operation into type TYPE. */
987 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
988 stmt_vec_info pattern_stmt_info
989 = new_stmt_vec_info (pattern_stmt
, stmt_vinfo
->vinfo
);
990 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
991 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
992 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
994 gimple_assign_lhs (pattern_stmt
));
997 if (dump_enabled_p ())
998 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1000 stmts
->safe_push (last_stmt
);
1001 return pattern_stmt
;
1005 /* Function vect_recog_pow_pattern
1007 Try to find the following pattern:
1011 with POW being one of pow, powf, powi, powif and N being
1016 * LAST_STMT: A stmt from which the pattern search begins.
1020 * TYPE_IN: The type of the input arguments to the pattern.
1022 * TYPE_OUT: The type of the output of this pattern.
1024 * Return value: A new stmt that will be used to replace the sequence of
1025 stmts that constitute the pattern. In this case it will be:
1032 vect_recog_pow_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1035 gimple
*last_stmt
= (*stmts
)[0];
1036 tree base
, exp
= NULL
;
1040 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1043 switch (gimple_call_combined_fn (last_stmt
))
1047 base
= gimple_call_arg (last_stmt
, 0);
1048 exp
= gimple_call_arg (last_stmt
, 1);
1049 if (TREE_CODE (exp
) != REAL_CST
1050 && TREE_CODE (exp
) != INTEGER_CST
)
1058 /* We now have a pow or powi builtin function call with a constant
1061 *type_out
= NULL_TREE
;
1063 /* Catch squaring. */
1064 if ((tree_fits_shwi_p (exp
)
1065 && tree_to_shwi (exp
) == 2)
1066 || (TREE_CODE (exp
) == REAL_CST
1067 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1069 *type_in
= TREE_TYPE (base
);
1071 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1072 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1076 /* Catch square root. */
1077 if (TREE_CODE (exp
) == REAL_CST
1078 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1080 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1082 && direct_internal_fn_supported_p (IFN_SQRT
, *type_in
,
1083 OPTIMIZE_FOR_SPEED
))
1085 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1086 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1087 gimple_call_set_lhs (stmt
, var
);
1096 /* Function vect_recog_widen_sum_pattern
1098 Try to find the following pattern:
1101 TYPE x_T, sum = init;
1103 sum_0 = phi <init, sum_1>
1105 S2 x_T = (TYPE) x_t;
1106 S3 sum_1 = x_T + sum_0;
1108 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1109 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1110 a special case of a reduction computation.
1114 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1115 when this function is called with S3, the pattern {S2,S3} will be detected.
1119 * TYPE_IN: The type of the input arguments to the pattern.
1121 * TYPE_OUT: The type of the output of this pattern.
1123 * Return value: A new stmt that will be used to replace the sequence of
1124 stmts that constitute the pattern. In this case it will be:
1125 WIDEN_SUM <x_t, sum_0>
1127 Note: The widening-sum idiom is a widening reduction pattern that is
1128 vectorized without preserving all the intermediate results. It
1129 produces only N/2 (widened) results (by summing up pairs of
1130 intermediate results) rather than all N results. Therefore, we
1131 cannot allow this pattern when we want to get all the results and in
1132 the correct order (as is the case when this computation is in an
1133 inner-loop nested in an outer-loop that us being vectorized). */
1136 vect_recog_widen_sum_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1139 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
1140 tree oprnd0
, oprnd1
;
1141 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1142 tree type
, half_type
;
1143 gimple
*pattern_stmt
;
1144 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1152 loop
= LOOP_VINFO_LOOP (loop_info
);
1154 /* We don't allow changing the order of the computation in the inner-loop
1155 when doing outer-loop vectorization. */
1156 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1159 if (!is_gimple_assign (last_stmt
))
1162 type
= gimple_expr_type (last_stmt
);
1164 /* Look for the following pattern
1167 In which DX is at least double the size of X, and sum_1 has been
1168 recognized as a reduction variable.
1171 /* Starting from LAST_STMT, follow the defs of its uses in search
1172 of the above pattern. */
1174 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1177 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
1178 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo
))
1181 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1182 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1183 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1184 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1187 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1188 we know that oprnd1 is the reduction variable (defined by a loop-header
1189 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1190 Left to check that oprnd0 is defined by a cast from type 'type' to type
1193 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1198 oprnd0
= gimple_assign_rhs1 (stmt
);
1199 *type_in
= half_type
;
1202 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1203 var
= vect_recog_temp_ssa_var (type
, NULL
);
1204 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1206 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_NOTE
, vect_location
,
1209 "vect_recog_widen_sum_pattern: detected: ");
1210 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1213 return pattern_stmt
;
1217 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1220 STMT - a statement to check.
1221 DEF - we support operations with two operands, one of which is constant.
1222 The other operand can be defined by a demotion operation, or by a
1223 previous statement in a sequence of over-promoted operations. In the
1224 later case DEF is used to replace that operand. (It is defined by a
1225 pattern statement we created for the previous statement in the
1229 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1230 NULL, it's the type of DEF.
1231 STMTS - additional pattern statements. If a pattern statement (type
1232 conversion) is created in this function, its original statement is
1236 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1237 operands to use in the new pattern statement for STMT (will be created
1238 in vect_recog_over_widening_pattern ()).
1239 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1240 statements for STMT: the first one is a type promotion and the second
1241 one is the operation itself. We return the type promotion statement
1242 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1243 the second pattern statement. */
1246 vect_operation_fits_smaller_type (gimple
*stmt
, tree def
, tree
*new_type
,
1247 tree
*op0
, tree
*op1
, gimple
**new_def_stmt
,
1248 vec
<gimple
*> *stmts
)
1250 enum tree_code code
;
1251 tree const_oprnd
, oprnd
;
1252 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1253 gimple
*def_stmt
, *new_stmt
;
1259 *new_def_stmt
= NULL
;
1261 if (!is_gimple_assign (stmt
))
1264 code
= gimple_assign_rhs_code (stmt
);
1265 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1266 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1269 oprnd
= gimple_assign_rhs1 (stmt
);
1270 const_oprnd
= gimple_assign_rhs2 (stmt
);
1271 type
= gimple_expr_type (stmt
);
1273 if (TREE_CODE (oprnd
) != SSA_NAME
1274 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1277 /* If oprnd has other uses besides that in stmt we cannot mark it
1278 as being part of a pattern only. */
1279 if (!has_single_use (oprnd
))
1282 /* If we are in the middle of a sequence, we use DEF from a previous
1283 statement. Otherwise, OPRND has to be a result of type promotion. */
1286 half_type
= *new_type
;
1292 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1295 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1299 /* Can we perform the operation on a smaller type? */
1305 if (!int_fits_type_p (const_oprnd
, half_type
))
1307 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1308 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1311 interm_type
= build_nonstandard_integer_type (
1312 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1313 if (!int_fits_type_p (const_oprnd
, interm_type
))
1320 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1321 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1324 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1325 (e.g., if the original value was char, the shift amount is at most 8
1326 if we want to use short). */
1327 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1330 interm_type
= build_nonstandard_integer_type (
1331 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1333 if (!vect_supportable_shift (code
, interm_type
))
1339 if (vect_supportable_shift (code
, half_type
))
1342 /* Try intermediate type - HALF_TYPE is not supported. */
1343 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1346 interm_type
= build_nonstandard_integer_type (
1347 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1349 if (!vect_supportable_shift (code
, interm_type
))
1358 /* There are four possible cases:
1359 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1360 the first statement in the sequence)
1361 a. The original, HALF_TYPE, is not enough - we replace the promotion
1362 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1363 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1365 2. OPRND is defined by a pattern statement we created.
1366 a. Its type is not sufficient for the operation, we create a new stmt:
1367 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1368 this statement in NEW_DEF_STMT, and it is later put in
1369 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1370 b. OPRND is good to use in the new statement. */
1375 /* Replace the original type conversion HALF_TYPE->TYPE with
1376 HALF_TYPE->INTERM_TYPE. */
1377 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1379 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1380 /* Check if the already created pattern stmt is what we need. */
1381 if (!is_gimple_assign (new_stmt
)
1382 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1383 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1386 stmts
->safe_push (def_stmt
);
1387 oprnd
= gimple_assign_lhs (new_stmt
);
1391 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1392 oprnd
= gimple_assign_rhs1 (def_stmt
);
1393 new_oprnd
= make_ssa_name (interm_type
);
1394 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1395 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1396 stmts
->safe_push (def_stmt
);
1402 /* Retrieve the operand before the type promotion. */
1403 oprnd
= gimple_assign_rhs1 (def_stmt
);
1410 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1411 new_oprnd
= make_ssa_name (interm_type
);
1412 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1414 *new_def_stmt
= new_stmt
;
1417 /* Otherwise, OPRND is already set. */
1421 *new_type
= interm_type
;
1423 *new_type
= half_type
;
1426 *op1
= fold_convert (*new_type
, const_oprnd
);
1432 /* Try to find a statement or a sequence of statements that can be performed
1436 TYPE x_T, res0_T, res1_T;
1439 S2 x_T = (TYPE) x_t;
1440 S3 res0_T = op (x_T, C0);
1441 S4 res1_T = op (res0_T, C1);
1442 S5 ... = () res1_T; - type demotion
1444 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1446 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1447 be 'type' or some intermediate type. For now, we expect S5 to be a type
1448 demotion operation. We also check that S3 and S4 have only one use. */
1451 vect_recog_over_widening_pattern (vec
<gimple
*> *stmts
,
1452 tree
*type_in
, tree
*type_out
)
1454 gimple
*stmt
= stmts
->pop ();
1455 gimple
*pattern_stmt
= NULL
, *new_def_stmt
, *prev_stmt
= NULL
,
1457 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1458 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1465 if (!vinfo_for_stmt (stmt
)
1466 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1469 new_def_stmt
= NULL
;
1470 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1471 &op0
, &op1
, &new_def_stmt
,
1480 /* STMT can be performed on a smaller type. Check its uses. */
1481 use_stmt
= vect_single_imm_use (stmt
);
1482 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1485 /* Create pattern statement for STMT. */
1486 vectype
= get_vectype_for_scalar_type (new_type
);
1490 /* We want to collect all the statements for which we create pattern
1491 statetments, except for the case when the last statement in the
1492 sequence doesn't have a corresponding pattern statement. In such
1493 case we associate the last pattern statement with the last statement
1494 in the sequence. Therefore, we only add the original statement to
1495 the list if we know that it is not the last. */
1497 stmts
->safe_push (prev_stmt
);
1499 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1501 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1502 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1503 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1505 if (dump_enabled_p ())
1507 dump_printf_loc (MSG_NOTE
, vect_location
,
1508 "created pattern stmt: ");
1509 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1512 type
= gimple_expr_type (stmt
);
1519 /* We got a sequence. We expect it to end with a type demotion operation.
1520 Otherwise, we quit (for now). There are three possible cases: the
1521 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1522 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1523 NEW_TYPE differs (we create a new conversion statement). */
1524 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1526 use_lhs
= gimple_assign_lhs (use_stmt
);
1527 use_type
= TREE_TYPE (use_lhs
);
1528 /* Support only type demotion or signedess change. */
1529 if (!INTEGRAL_TYPE_P (use_type
)
1530 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1533 /* Check that NEW_TYPE is not bigger than the conversion result. */
1534 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1537 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1538 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1540 /* Create NEW_TYPE->USE_TYPE conversion. */
1541 new_oprnd
= make_ssa_name (use_type
);
1542 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1543 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1545 *type_in
= get_vectype_for_scalar_type (new_type
);
1546 *type_out
= get_vectype_for_scalar_type (use_type
);
1548 /* We created a pattern statement for the last statement in the
1549 sequence, so we don't need to associate it with the pattern
1550 statement created for PREV_STMT. Therefore, we add PREV_STMT
1551 to the list in order to mark it later in vect_pattern_recog_1. */
1553 stmts
->safe_push (prev_stmt
);
1558 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1559 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1562 *type_out
= NULL_TREE
;
1565 stmts
->safe_push (use_stmt
);
1568 /* TODO: support general case, create a conversion to the correct type. */
1571 /* Pattern detected. */
1572 if (dump_enabled_p ())
1574 dump_printf_loc (MSG_NOTE
, vect_location
,
1575 "vect_recog_over_widening_pattern: detected: ");
1576 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1579 return pattern_stmt
;
1582 /* Detect widening shift pattern:
1588 S2 a_T = (TYPE) a_t;
1589 S3 res_T = a_T << CONST;
1591 where type 'TYPE' is at least double the size of type 'type'.
1593 Also detect cases where the shift result is immediately converted
1594 to another type 'result_type' that is no larger in size than 'TYPE'.
1595 In those cases we perform a widen-shift that directly results in
1596 'result_type', to avoid a possible over-widening situation:
1600 result_type res_result;
1603 S2 a_T = (TYPE) a_t;
1604 S3 res_T = a_T << CONST;
1605 S4 res_result = (result_type) res_T;
1606 '--> res_result' = a_t w<< CONST;
1608 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1609 create an additional pattern stmt for S2 to create a variable of an
1610 intermediate type, and perform widen-shift on the intermediate type:
1614 TYPE a_T, res_T, res_T';
1617 S2 a_T = (TYPE) a_t;
1618 '--> a_it = (interm_type) a_t;
1619 S3 res_T = a_T << CONST;
1620 '--> res_T' = a_it <<* CONST;
1624 * STMTS: Contains a stmt from which the pattern search begins.
1625 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1626 in STMTS. When an intermediate type is used and a pattern statement is
1627 created for S2, we also put S2 here (before S3).
1631 * TYPE_IN: The type of the input arguments to the pattern.
1633 * TYPE_OUT: The type of the output of this pattern.
1635 * Return value: A new stmt that will be used to replace the sequence of
1636 stmts that constitute the pattern. In this case it will be:
1637 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1640 vect_recog_widen_shift_pattern (vec
<gimple
*> *stmts
,
1641 tree
*type_in
, tree
*type_out
)
1643 gimple
*last_stmt
= stmts
->pop ();
1645 tree oprnd0
, oprnd1
;
1646 tree type
, half_type0
;
1647 gimple
*pattern_stmt
;
1648 tree vectype
, vectype_out
= NULL_TREE
;
1650 enum tree_code dummy_code
;
1652 vec
<tree
> dummy_vec
;
1656 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1659 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1662 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1665 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1666 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1667 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1670 /* Check operand 0: it has to be defined by a type promotion. */
1671 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1676 /* Check operand 1: has to be positive. We check that it fits the type
1677 in vect_handle_widen_op_by_const (). */
1678 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1681 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1682 type
= gimple_expr_type (last_stmt
);
1684 /* Check for subsequent conversion to another type. */
1685 use_stmt
= vect_single_imm_use (last_stmt
);
1686 if (use_stmt
&& is_gimple_assign (use_stmt
)
1687 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1688 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1690 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1691 tree use_type
= TREE_TYPE (use_lhs
);
1693 if (INTEGRAL_TYPE_P (use_type
)
1694 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1696 last_stmt
= use_stmt
;
1701 /* Check if this a widening operation. */
1702 gimple
*wstmt
= NULL
;
1703 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1705 type
, &half_type0
, def_stmt0
))
1708 /* Pattern detected. */
1709 if (dump_enabled_p ())
1710 dump_printf_loc (MSG_NOTE
, vect_location
,
1711 "vect_recog_widen_shift_pattern: detected:\n");
1713 /* Check target support. */
1714 vectype
= get_vectype_for_scalar_type (half_type0
);
1715 vectype_out
= get_vectype_for_scalar_type (type
);
1719 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1720 vectype_out
, vectype
,
1721 &dummy_code
, &dummy_code
,
1722 &dummy_int
, &dummy_vec
))
1726 *type_out
= vectype_out
;
1728 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1729 var
= vect_recog_temp_ssa_var (type
, NULL
);
1731 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1734 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1735 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1736 stmt_vec_info new_stmt_info
1737 = new_stmt_vec_info (wstmt
, stmt_vinfo
->vinfo
);
1738 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1739 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1742 if (dump_enabled_p ())
1743 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1745 stmts
->safe_push (last_stmt
);
1746 return pattern_stmt
;
1749 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1753 S0 a_t = b_t r<< c_t;
1757 * STMTS: Contains a stmt from which the pattern search begins,
1758 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1762 S2 e_t = d_t & (B - 1);
1763 S3 f_t = b_t << c_t;
1764 S4 g_t = b_t >> e_t;
1767 where B is element bitsize of type.
1771 * TYPE_IN: The type of the input arguments to the pattern.
1773 * TYPE_OUT: The type of the output of this pattern.
1775 * Return value: A new stmt that will be used to replace the rotate
1779 vect_recog_rotate_pattern (vec
<gimple
*> *stmts
, tree
*type_in
, tree
*type_out
)
1781 gimple
*last_stmt
= stmts
->pop ();
1782 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1783 gimple
*pattern_stmt
, *def_stmt
;
1784 enum tree_code rhs_code
;
1785 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1786 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1787 enum vect_def_type dt
;
1788 optab optab1
, optab2
;
1789 edge ext_def
= NULL
;
1791 if (!is_gimple_assign (last_stmt
))
1794 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1804 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1807 lhs
= gimple_assign_lhs (last_stmt
);
1808 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1809 type
= TREE_TYPE (oprnd0
);
1810 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1811 if (TREE_CODE (oprnd0
) != SSA_NAME
1812 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1813 || !INTEGRAL_TYPE_P (type
)
1814 || !TYPE_UNSIGNED (type
))
1817 if (!vect_is_simple_use (oprnd1
, vinfo
, &def_stmt
, &dt
))
1820 if (dt
!= vect_internal_def
1821 && dt
!= vect_constant_def
1822 && dt
!= vect_external_def
)
1825 vectype
= get_vectype_for_scalar_type (type
);
1826 if (vectype
== NULL_TREE
)
1829 /* If vector/vector or vector/scalar rotate is supported by the target,
1830 don't do anything here. */
1831 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1833 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1836 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
1838 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1840 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1844 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1845 don't do anything here either. */
1846 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1847 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1849 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1851 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1853 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
1855 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1856 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1858 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1860 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1865 *type_out
= vectype
;
1866 if (*type_in
== NULL_TREE
)
1869 if (dt
== vect_external_def
1870 && TREE_CODE (oprnd1
) == SSA_NAME
1871 && is_a
<loop_vec_info
> (vinfo
))
1873 struct loop
*loop
= as_a
<loop_vec_info
> (vinfo
)->loop
;
1874 ext_def
= loop_preheader_edge (loop
);
1875 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1877 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1879 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1885 if (TREE_CODE (oprnd1
) == INTEGER_CST
1886 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1888 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1890 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1891 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1892 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1893 == TYPE_PRECISION (type
))
1897 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1898 if (def
== NULL_TREE
)
1900 def
= vect_recog_temp_ssa_var (type
, NULL
);
1901 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1905 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1906 gcc_assert (!new_bb
);
1909 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1911 stype
= TREE_TYPE (def
);
1913 if (TREE_CODE (def
) == INTEGER_CST
)
1915 if (!tree_fits_uhwi_p (def
)
1916 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1917 || integer_zerop (def
))
1919 def2
= build_int_cst (stype
,
1920 GET_MODE_PRECISION (TYPE_MODE (type
))
1921 - tree_to_uhwi (def
));
1925 tree vecstype
= get_vectype_for_scalar_type (stype
);
1926 stmt_vec_info def_stmt_vinfo
;
1928 if (vecstype
== NULL_TREE
)
1930 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1931 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1935 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1936 gcc_assert (!new_bb
);
1940 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1941 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1942 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1943 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1946 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1948 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1949 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1950 gimple_assign_lhs (def_stmt
), mask
);
1954 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1955 gcc_assert (!new_bb
);
1959 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1960 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1961 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1962 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1966 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1967 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
1968 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
1970 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1972 var2
= vect_recog_temp_ssa_var (type
, NULL
);
1973 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
1974 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
1976 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1978 /* Pattern detected. */
1979 if (dump_enabled_p ())
1980 dump_printf_loc (MSG_NOTE
, vect_location
,
1981 "vect_recog_rotate_pattern: detected:\n");
1983 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1984 var
= vect_recog_temp_ssa_var (type
, NULL
);
1985 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
1987 if (dump_enabled_p ())
1988 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1990 stmts
->safe_push (last_stmt
);
1991 return pattern_stmt
;
1994 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2002 S3 res_T = b_T op a_t;
2004 where type 'TYPE' is a type with different size than 'type',
2005 and op is <<, >> or rotate.
2010 TYPE b_T, c_T, res_T;
2013 S1 a_t = (type) c_T;
2015 S3 res_T = b_T op a_t;
2019 * STMTS: Contains a stmt from which the pattern search begins,
2020 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2021 with a shift/rotate which has same type on both operands, in the
2022 second case just b_T op c_T, in the first case with added cast
2023 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2027 * TYPE_IN: The type of the input arguments to the pattern.
2029 * TYPE_OUT: The type of the output of this pattern.
2031 * Return value: A new stmt that will be used to replace the shift/rotate
2035 vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *stmts
,
2036 tree
*type_in
, tree
*type_out
)
2038 gimple
*last_stmt
= stmts
->pop ();
2039 tree oprnd0
, oprnd1
, lhs
, var
;
2040 gimple
*pattern_stmt
, *def_stmt
;
2041 enum tree_code rhs_code
;
2042 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2043 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2044 enum vect_def_type dt
;
2046 if (!is_gimple_assign (last_stmt
))
2049 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2061 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2064 lhs
= gimple_assign_lhs (last_stmt
);
2065 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2066 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2067 if (TREE_CODE (oprnd0
) != SSA_NAME
2068 || TREE_CODE (oprnd1
) != SSA_NAME
2069 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2070 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2071 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2072 || TYPE_PRECISION (TREE_TYPE (lhs
))
2073 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2076 if (!vect_is_simple_use (oprnd1
, vinfo
, &def_stmt
, &dt
))
2079 if (dt
!= vect_internal_def
)
2082 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2083 *type_out
= *type_in
;
2084 if (*type_in
== NULL_TREE
)
2087 tree def
= NULL_TREE
;
2088 stmt_vec_info def_vinfo
= vinfo_for_stmt (def_stmt
);
2089 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo
) && gimple_assign_cast_p (def_stmt
))
2091 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2092 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2093 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2094 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2096 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2097 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2102 = build_low_bits_mask (TREE_TYPE (rhs1
),
2103 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2104 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2105 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2106 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2111 if (def
== NULL_TREE
)
2113 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2114 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2115 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2118 /* Pattern detected. */
2119 if (dump_enabled_p ())
2120 dump_printf_loc (MSG_NOTE
, vect_location
,
2121 "vect_recog_vector_vector_shift_pattern: detected:\n");
2123 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2124 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2125 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2127 if (dump_enabled_p ())
2128 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2130 stmts
->safe_push (last_stmt
);
2131 return pattern_stmt
;
2134 /* Detect multiplication by constant which are postive or negatives of power 2,
2135 and convert them to shift patterns.
2137 Mult with constants that are postive power of two.
2144 Mult with constants that are negative power of two.
2149 STMTS: Contains a stmt from which the pattern search begins,
2150 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2151 constant operand is a power of 2.
2153 S1': b_t = a_t << log2 (n)
2155 Convert the mult operation to LSHIFT and followed by a NEGATE
2156 if constant operand is a negative power of 2.
2157 type a_t, b_t, res_T;
2158 S2': b_t = a_t << log2 (n)
2159 S3': res_T = - (b_t)
2163 * TYPE_IN: The type of the input arguments to the pattern.
2165 * TYPE_OUT: The type of the output of this pattern.
2167 * Return value: A new stmt that will be used to replace the multiplication
2171 vect_recog_mult_pattern (vec
<gimple
*> *stmts
,
2172 tree
*type_in
, tree
*type_out
)
2174 gimple
*last_stmt
= stmts
->pop ();
2175 tree oprnd0
, oprnd1
, vectype
, itype
;
2176 gimple
*pattern_stmt
, *def_stmt
;
2178 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2179 int power2_val
, power2_neg_val
;
2182 if (!is_gimple_assign (last_stmt
))
2185 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2188 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2189 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2190 itype
= TREE_TYPE (oprnd0
);
2192 if (TREE_CODE (oprnd0
) != SSA_NAME
2193 || TREE_CODE (oprnd1
) != INTEGER_CST
2194 || !INTEGRAL_TYPE_P (itype
)
2195 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2198 vectype
= get_vectype_for_scalar_type (itype
);
2199 if (vectype
== NULL_TREE
)
2202 /* If the target can handle vectorized multiplication natively,
2203 don't attempt to optimize this. */
2204 optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2205 if (optab
!= unknown_optab
)
2207 machine_mode vec_mode
= TYPE_MODE (vectype
);
2208 int icode
= (int) optab_handler (optab
, vec_mode
);
2209 if (icode
!= CODE_FOR_nothing
)
2213 /* If target cannot handle vector left shift then we cannot
2214 optimize and bail out. */
2215 optab
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2217 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2220 power2_val
= wi::exact_log2 (oprnd1
);
2221 power2_neg_val
= wi::exact_log2 (wi::neg (oprnd1
));
2223 /* Handle constant operands that are postive or negative powers of 2. */
2224 if (power2_val
!= -1)
2226 shift
= build_int_cst (itype
, power2_val
);
2228 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2229 LSHIFT_EXPR
, oprnd0
, shift
);
2231 else if (power2_neg_val
!= -1)
2233 /* If the target cannot handle vector NEGATE then we cannot
2234 do the optimization. */
2235 optab
= optab_for_tree_code (NEGATE_EXPR
, vectype
, optab_vector
);
2237 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2240 shift
= build_int_cst (itype
, power2_neg_val
);
2242 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2243 LSHIFT_EXPR
, oprnd0
, shift
);
2244 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2246 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2247 NEGATE_EXPR
, gimple_assign_lhs (def_stmt
));
2252 /* Pattern detected. */
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_NOTE
, vect_location
,
2255 "vect_recog_mult_pattern: detected:\n");
2257 if (dump_enabled_p ())
2258 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
,
2261 stmts
->safe_push (last_stmt
);
2263 *type_out
= vectype
;
2265 return pattern_stmt
;
2268 /* Detect a signed division by a constant that wouldn't be
2269 otherwise vectorized:
2275 where type 'type' is an integral type and N is a constant.
2277 Similarly handle modulo by a constant:
2283 * STMTS: Contains a stmt from which the pattern search begins,
2284 i.e. the division stmt. S1 is replaced by if N is a power
2285 of two constant and type is signed:
2286 S3 y_t = b_t < 0 ? N - 1 : 0;
2288 S1' a_t = x_t >> log2 (N);
2290 S4 is replaced if N is a power of two constant and
2291 type is signed by (where *_T temporaries have unsigned type):
2292 S9 y_T = b_t < 0 ? -1U : 0U;
2293 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2294 S7 z_t = (type) z_T;
2296 S5 x_t = w_t & (N - 1);
2297 S4' a_t = x_t - z_t;
2301 * TYPE_IN: The type of the input arguments to the pattern.
2303 * TYPE_OUT: The type of the output of this pattern.
2305 * Return value: A new stmt that will be used to replace the division
2306 S1 or modulo S4 stmt. */
2309 vect_recog_divmod_pattern (vec
<gimple
*> *stmts
,
2310 tree
*type_in
, tree
*type_out
)
2312 gimple
*last_stmt
= stmts
->pop ();
2313 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2314 gimple
*pattern_stmt
, *def_stmt
;
2315 enum tree_code rhs_code
;
2316 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2317 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2320 int dummy_int
, prec
;
2321 stmt_vec_info def_stmt_vinfo
;
2323 if (!is_gimple_assign (last_stmt
))
2326 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2329 case TRUNC_DIV_EXPR
:
2330 case TRUNC_MOD_EXPR
:
2336 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2339 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2340 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2341 itype
= TREE_TYPE (oprnd0
);
2342 if (TREE_CODE (oprnd0
) != SSA_NAME
2343 || TREE_CODE (oprnd1
) != INTEGER_CST
2344 || TREE_CODE (itype
) != INTEGER_TYPE
2345 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2348 vectype
= get_vectype_for_scalar_type (itype
);
2349 if (vectype
== NULL_TREE
)
2352 /* If the target can handle vectorized division or modulo natively,
2353 don't attempt to optimize this. */
2354 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2355 if (optab
!= unknown_optab
)
2357 machine_mode vec_mode
= TYPE_MODE (vectype
);
2358 int icode
= (int) optab_handler (optab
, vec_mode
);
2359 if (icode
!= CODE_FOR_nothing
)
2363 prec
= TYPE_PRECISION (itype
);
2364 if (integer_pow2p (oprnd1
))
2366 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2369 /* Pattern detected. */
2370 if (dump_enabled_p ())
2371 dump_printf_loc (MSG_NOTE
, vect_location
,
2372 "vect_recog_divmod_pattern: detected:\n");
2374 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2375 build_int_cst (itype
, 0));
2376 if (rhs_code
== TRUNC_DIV_EXPR
)
2378 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2381 = gimple_build_assign (var
, COND_EXPR
, cond
,
2382 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2383 build_int_cst (itype
, 1)),
2384 build_int_cst (itype
, 0));
2385 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2386 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2388 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2389 gimple_assign_lhs (def_stmt
));
2390 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2392 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2394 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2395 RSHIFT_EXPR
, var
, shift
);
2400 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2401 if (compare_tree_int (oprnd1
, 2) == 0)
2403 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2404 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2405 build_int_cst (itype
, 1),
2406 build_int_cst (itype
, 0));
2407 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2412 = build_nonstandard_integer_type (prec
, 1);
2413 tree vecutype
= get_vectype_for_scalar_type (utype
);
2415 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2416 - tree_log2 (oprnd1
));
2417 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2419 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2420 build_int_cst (utype
, -1),
2421 build_int_cst (utype
, 0));
2422 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2423 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2424 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2425 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2426 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2427 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2428 gimple_assign_lhs (def_stmt
),
2430 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2431 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2432 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2433 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2434 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2436 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2437 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2440 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2441 PLUS_EXPR
, oprnd0
, signmask
);
2442 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2444 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2445 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2446 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2447 build_int_cst (itype
, 1)));
2448 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2451 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2452 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2456 if (dump_enabled_p ())
2457 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2460 stmts
->safe_push (last_stmt
);
2463 *type_out
= vectype
;
2464 return pattern_stmt
;
2467 if (prec
> HOST_BITS_PER_WIDE_INT
2468 || integer_zerop (oprnd1
))
2471 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2474 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2476 if (TYPE_UNSIGNED (itype
))
2478 unsigned HOST_WIDE_INT mh
, ml
;
2479 int pre_shift
, post_shift
;
2480 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2481 & GET_MODE_MASK (TYPE_MODE (itype
)));
2482 tree t1
, t2
, t3
, t4
;
2484 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2485 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2488 /* Find a suitable multiplier and right shift count
2489 instead of multiplying with D. */
2490 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2492 /* If the suggested multiplier is more than SIZE bits, we can do better
2493 for even divisors, using an initial right shift. */
2494 if (mh
!= 0 && (d
& 1) == 0)
2496 pre_shift
= floor_log2 (d
& -d
);
2497 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2498 &ml
, &post_shift
, &dummy_int
);
2506 if (post_shift
- 1 >= prec
)
2509 /* t1 = oprnd0 h* ml;
2513 q = t4 >> (post_shift - 1); */
2514 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2515 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2516 build_int_cst (itype
, ml
));
2517 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2519 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2521 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2522 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2524 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2526 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2527 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2529 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2531 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2533 if (post_shift
!= 1)
2535 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2537 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2539 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2540 build_int_cst (itype
, post_shift
- 1));
2545 pattern_stmt
= def_stmt
;
2550 if (pre_shift
>= prec
|| post_shift
>= prec
)
2553 /* t1 = oprnd0 >> pre_shift;
2555 q = t2 >> post_shift; */
2558 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2560 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2561 build_int_cst (NULL
, pre_shift
));
2562 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2567 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2568 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2569 build_int_cst (itype
, ml
));
2573 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2575 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2577 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2578 build_int_cst (itype
, post_shift
));
2583 pattern_stmt
= def_stmt
;
2588 unsigned HOST_WIDE_INT ml
;
2590 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2591 unsigned HOST_WIDE_INT abs_d
;
2593 tree t1
, t2
, t3
, t4
;
2595 /* Give up for -1. */
2599 /* Since d might be INT_MIN, we have to cast to
2600 unsigned HOST_WIDE_INT before negating to avoid
2601 undefined signed overflow. */
2603 ? (unsigned HOST_WIDE_INT
) d
2604 : - (unsigned HOST_WIDE_INT
) d
);
2606 /* n rem d = n rem -d */
2607 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2610 oprnd1
= build_int_cst (itype
, abs_d
);
2612 else if (HOST_BITS_PER_WIDE_INT
>= prec
2613 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2614 /* This case is not handled correctly below. */
2617 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2618 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2621 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2623 if (post_shift
>= prec
)
2626 /* t1 = oprnd0 h* ml; */
2627 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2628 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2629 build_int_cst (itype
, ml
));
2633 /* t2 = t1 + oprnd0; */
2634 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2635 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2636 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2643 /* t3 = t2 >> post_shift; */
2644 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2645 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2646 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2647 build_int_cst (itype
, post_shift
));
2652 wide_int oprnd0_min
, oprnd0_max
;
2654 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2656 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2658 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2662 if (msb
== 0 && d
>= 0)
2666 pattern_stmt
= def_stmt
;
2670 /* t4 = oprnd0 >> (prec - 1);
2671 or if we know from VRP that oprnd0 >= 0
2673 or if we know from VRP that oprnd0 < 0
2675 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2676 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2678 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2679 build_int_cst (itype
, msb
));
2681 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2682 build_int_cst (itype
, prec
- 1));
2683 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2685 /* q = t3 - t4; or q = t4 - t3; */
2686 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2687 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2692 if (rhs_code
== TRUNC_MOD_EXPR
)
2696 /* We divided. Now finish by:
2699 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2701 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2702 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2703 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2705 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2706 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2709 /* Pattern detected. */
2710 if (dump_enabled_p ())
2712 dump_printf_loc (MSG_NOTE
, vect_location
,
2713 "vect_recog_divmod_pattern: detected: ");
2714 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2717 stmts
->safe_push (last_stmt
);
2720 *type_out
= vectype
;
2721 return pattern_stmt
;
2724 /* Function vect_recog_mixed_size_cond_pattern
2726 Try to find the following pattern:
2731 S1 a_T = x_t CMP y_t ? b_T : c_T;
2733 where type 'TYPE' is an integral type which has different size
2734 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2735 than 'type', the constants need to fit into an integer type
2736 with the same width as 'type') or results of conversion from 'type'.
2740 * LAST_STMT: A stmt from which the pattern search begins.
2744 * TYPE_IN: The type of the input arguments to the pattern.
2746 * TYPE_OUT: The type of the output of this pattern.
2748 * Return value: A new stmt that will be used to replace the pattern.
2749 Additionally a def_stmt is added.
2751 a_it = x_t CMP y_t ? b_it : c_it;
2752 a_T = (TYPE) a_it; */
2755 vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
2758 gimple
*last_stmt
= (*stmts
)[0];
2759 tree cond_expr
, then_clause
, else_clause
;
2760 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2761 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2762 gimple
*pattern_stmt
, *def_stmt
;
2763 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2764 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2765 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
2767 tree comp_scalar_type
;
2769 if (!is_gimple_assign (last_stmt
)
2770 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2771 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2774 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2775 then_clause
= gimple_assign_rhs2 (last_stmt
);
2776 else_clause
= gimple_assign_rhs3 (last_stmt
);
2778 if (!COMPARISON_CLASS_P (cond_expr
))
2781 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2782 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2783 if (comp_vectype
== NULL_TREE
)
2786 type
= gimple_expr_type (last_stmt
);
2787 if (types_compatible_p (type
, comp_scalar_type
)
2788 || ((TREE_CODE (then_clause
) != INTEGER_CST
2789 || TREE_CODE (else_clause
) != INTEGER_CST
)
2790 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2791 || !INTEGRAL_TYPE_P (type
))
2794 if ((TREE_CODE (then_clause
) != INTEGER_CST
2795 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2796 &def_stmt0
, &promotion
))
2797 || (TREE_CODE (else_clause
) != INTEGER_CST
2798 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2799 &def_stmt1
, &promotion
)))
2802 if (orig_type0
&& orig_type1
2803 && !types_compatible_p (orig_type0
, orig_type1
))
2808 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2810 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2816 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2818 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2823 HOST_WIDE_INT cmp_mode_size
2824 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
2826 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == cmp_mode_size
)
2829 vectype
= get_vectype_for_scalar_type (type
);
2830 if (vectype
== NULL_TREE
)
2833 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2836 if (itype
== NULL_TREE
)
2837 itype
= build_nonstandard_integer_type (cmp_mode_size
,
2838 TYPE_UNSIGNED (type
));
2840 if (itype
== NULL_TREE
2841 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != cmp_mode_size
)
2844 vecitype
= get_vectype_for_scalar_type (itype
);
2845 if (vecitype
== NULL_TREE
)
2848 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2851 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > cmp_mode_size
)
2853 if ((TREE_CODE (then_clause
) == INTEGER_CST
2854 && !int_fits_type_p (then_clause
, itype
))
2855 || (TREE_CODE (else_clause
) == INTEGER_CST
2856 && !int_fits_type_p (else_clause
, itype
)))
2860 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2861 COND_EXPR
, unshare_expr (cond_expr
),
2862 fold_convert (itype
, then_clause
),
2863 fold_convert (itype
, else_clause
));
2864 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2865 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2867 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2868 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
2869 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2870 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2871 *type_in
= vecitype
;
2872 *type_out
= vectype
;
2874 if (dump_enabled_p ())
2875 dump_printf_loc (MSG_NOTE
, vect_location
,
2876 "vect_recog_mixed_size_cond_pattern: detected:\n");
2878 return pattern_stmt
;
2882 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2883 true if bool VAR can and should be optimized that way. Assume it shouldn't
2884 in case it's a result of a comparison which can be directly vectorized into
2885 a vector comparison. Fills in STMTS with all stmts visited during the
2889 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
2892 enum vect_def_type dt
;
2894 enum tree_code rhs_code
;
2896 if (!vect_is_simple_use (var
, vinfo
, &def_stmt
, &dt
))
2899 if (dt
!= vect_internal_def
)
2902 if (!is_gimple_assign (def_stmt
))
2905 if (stmts
.contains (def_stmt
))
2908 rhs1
= gimple_assign_rhs1 (def_stmt
);
2909 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2913 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
2918 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2919 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2920 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2922 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
2927 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
2934 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
2935 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
2940 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2942 tree vecitype
, comp_vectype
;
2944 /* If the comparison can throw, then is_gimple_condexpr will be
2945 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2946 if (stmt_could_throw_p (def_stmt
))
2949 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2950 if (comp_vectype
== NULL_TREE
)
2953 tree mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
2955 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
))
2958 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2960 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2962 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2963 vecitype
= get_vectype_for_scalar_type (itype
);
2964 if (vecitype
== NULL_TREE
)
2968 vecitype
= comp_vectype
;
2969 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2977 bool res
= stmts
.add (def_stmt
);
2978 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
2985 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2986 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
2987 pattern sequence. */
2990 adjust_bool_pattern_cast (tree type
, tree var
, stmt_vec_info stmt_info
)
2992 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2994 stmt_vec_info patt_vinfo
= new_stmt_vec_info (cast_stmt
, stmt_info
->vinfo
);
2995 set_vinfo_for_stmt (cast_stmt
, patt_vinfo
);
2996 STMT_VINFO_VECTYPE (patt_vinfo
) = get_vectype_for_scalar_type (type
);
2997 append_pattern_def_seq (stmt_info
, cast_stmt
);
2998 return gimple_assign_lhs (cast_stmt
);
3001 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3002 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3003 type, OUT_TYPE is the desired final integer type of the whole pattern.
3004 STMT_INFO is the info of the pattern root and is where pattern stmts should
3005 be associated with. DEFS is a map of pattern defs. */
3008 adjust_bool_pattern (tree var
, tree out_type
,
3009 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
3011 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3012 enum tree_code rhs_code
, def_rhs_code
;
3013 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3015 gimple
*pattern_stmt
, *def_stmt
;
3016 tree trueval
= NULL_TREE
;
3018 rhs1
= gimple_assign_rhs1 (stmt
);
3019 rhs2
= gimple_assign_rhs2 (stmt
);
3020 rhs_code
= gimple_assign_rhs_code (stmt
);
3021 loc
= gimple_location (stmt
);
3026 irhs1
= *defs
.get (rhs1
);
3027 itype
= TREE_TYPE (irhs1
);
3029 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3034 irhs1
= *defs
.get (rhs1
);
3035 itype
= TREE_TYPE (irhs1
);
3037 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3038 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3042 /* Try to optimize x = y & (a < b ? 1 : 0); into
3043 x = (a < b ? y : 0);
3049 S1 a_b = x1 CMP1 y1;
3050 S2 b_b = x2 CMP2 y2;
3052 S4 d_T = (TYPE) c_b;
3054 we would normally emit:
3056 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3057 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3058 S3' c_T = a_T & b_T;
3061 but we can save one stmt by using the
3062 result of one of the COND_EXPRs in the other COND_EXPR and leave
3063 BIT_AND_EXPR stmt out:
3065 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3066 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3069 At least when VEC_COND_EXPR is implemented using masks
3070 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3071 computes the comparison masks and ands it, in one case with
3072 all ones vector, in the other case with a vector register.
3073 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3074 often more expensive. */
3075 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3076 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3077 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3079 irhs1
= *defs
.get (rhs1
);
3080 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3081 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3082 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3084 rhs_code
= def_rhs_code
;
3086 rhs2
= gimple_assign_rhs2 (def_stmt
);
3091 irhs2
= *defs
.get (rhs2
);
3094 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3095 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3096 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3098 irhs2
= *defs
.get (rhs2
);
3099 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3100 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3101 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3103 rhs_code
= def_rhs_code
;
3105 rhs2
= gimple_assign_rhs2 (def_stmt
);
3110 irhs1
= *defs
.get (rhs1
);
3116 irhs1
= *defs
.get (rhs1
);
3117 irhs2
= *defs
.get (rhs2
);
3119 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3120 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3122 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3123 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3124 int out_prec
= TYPE_PRECISION (out_type
);
3125 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3126 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), irhs2
,
3128 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3129 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), irhs1
,
3133 irhs1
= adjust_bool_pattern_cast (out_type
, irhs1
, stmt_info
);
3134 irhs2
= adjust_bool_pattern_cast (out_type
, irhs2
, stmt_info
);
3137 itype
= TREE_TYPE (irhs1
);
3139 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3140 rhs_code
, irhs1
, irhs2
);
3145 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3146 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3147 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3148 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3149 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3151 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3153 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3156 itype
= TREE_TYPE (rhs1
);
3157 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3158 if (trueval
== NULL_TREE
)
3159 trueval
= build_int_cst (itype
, 1);
3161 gcc_checking_assert (useless_type_conversion_p (itype
,
3162 TREE_TYPE (trueval
)));
3164 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3165 COND_EXPR
, cond_expr
, trueval
,
3166 build_int_cst (itype
, 0));
3170 gimple_set_location (pattern_stmt
, loc
);
3171 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3172 pattern def seq stmts instead of just letting auto-detection do
3174 stmt_vec_info patt_vinfo
= new_stmt_vec_info (pattern_stmt
, stmt_info
->vinfo
);
3175 set_vinfo_for_stmt (pattern_stmt
, patt_vinfo
);
3176 STMT_VINFO_VECTYPE (patt_vinfo
) = get_vectype_for_scalar_type (itype
);
3177 append_pattern_def_seq (stmt_info
, pattern_stmt
);
3178 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
3181 /* Comparison function to qsort a vector of gimple stmts after UID. */
3184 sort_after_uid (const void *p1
, const void *p2
)
3186 const gimple
*stmt1
= *(const gimple
* const *)p1
;
3187 const gimple
*stmt2
= *(const gimple
* const *)p2
;
3188 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
3191 /* Create pattern stmts for all stmts participating in the bool pattern
3192 specified by BOOL_STMT_SET and its root STMT with the desired type
3193 OUT_TYPE. Return the def of the pattern root. */
3196 adjust_bool_stmts (hash_set
<gimple
*> &bool_stmt_set
,
3197 tree out_type
, gimple
*stmt
)
3199 /* Gather original stmts in the bool pattern in their order of appearance
3201 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
3202 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
3203 i
!= bool_stmt_set
.end (); ++i
)
3204 bool_stmts
.quick_push (*i
);
3205 bool_stmts
.qsort (sort_after_uid
);
3207 /* Now process them in that order, producing pattern stmts. */
3208 hash_map
<tree
, tree
> defs
;
3209 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
3210 adjust_bool_pattern (gimple_assign_lhs (bool_stmts
[i
]),
3211 out_type
, vinfo_for_stmt (stmt
), defs
);
3213 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3214 gimple
*pattern_stmt
3215 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt
)));
3216 return gimple_assign_lhs (pattern_stmt
);
3219 /* Return the proper type for converting bool VAR into
3220 an integer value or NULL_TREE if no such type exists.
3221 The type is chosen so that converted value has the
3222 same number of elements as VAR's vector type. */
3225 search_type_for_mask (tree var
, vec_info
*vinfo
)
3228 enum vect_def_type dt
;
3230 enum tree_code rhs_code
;
3231 tree res
= NULL_TREE
, res2
;
3233 if (TREE_CODE (var
) != SSA_NAME
)
3236 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3237 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3238 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3241 if (!vect_is_simple_use (var
, vinfo
, &def_stmt
, &dt
))
3244 if (dt
!= vect_internal_def
)
3247 if (!is_gimple_assign (def_stmt
))
3250 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3251 rhs1
= gimple_assign_rhs1 (def_stmt
);
3258 res
= search_type_for_mask (rhs1
, vinfo
);
3264 res
= search_type_for_mask (rhs1
, vinfo
);
3265 res2
= search_type_for_mask (gimple_assign_rhs2 (def_stmt
), vinfo
);
3266 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3271 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3273 tree comp_vectype
, mask_type
;
3275 if (TREE_CODE (TREE_TYPE (rhs1
)) == BOOLEAN_TYPE
)
3277 res
= search_type_for_mask (rhs1
, vinfo
);
3278 res2
= search_type_for_mask (gimple_assign_rhs2 (def_stmt
), vinfo
);
3279 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3284 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3285 if (comp_vectype
== NULL_TREE
)
3288 mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3290 || !expand_vec_cmp_expr_p (comp_vectype
, mask_type
))
3293 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3294 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
3296 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3297 res
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3300 res
= TREE_TYPE (rhs1
);
3308 /* Function vect_recog_bool_pattern
3310 Try to find pattern like following:
3312 bool a_b, b_b, c_b, d_b, e_b;
3315 S1 a_b = x1 CMP1 y1;
3316 S2 b_b = x2 CMP2 y2;
3318 S4 d_b = x3 CMP3 y3;
3320 S6 f_T = (TYPE) e_b;
3322 where type 'TYPE' is an integral type. Or a similar pattern
3325 S6 f_Y = e_b ? r_Y : s_Y;
3327 as results from if-conversion of a complex condition.
3331 * LAST_STMT: A stmt at the end from which the pattern
3332 search begins, i.e. cast of a bool to
3337 * TYPE_IN: The type of the input arguments to the pattern.
3339 * TYPE_OUT: The type of the output of this pattern.
3341 * Return value: A new stmt that will be used to replace the pattern.
3343 Assuming size of TYPE is the same as size of all comparisons
3344 (otherwise some casts would be added where needed), the above
3345 sequence we create related pattern stmts:
3346 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3347 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3348 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3349 S5' e_T = c_T | d_T;
3352 Instead of the above S3' we could emit:
3353 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3354 S3' c_T = a_T | b_T;
3355 but the above is more efficient. */
3358 vect_recog_bool_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
3361 gimple
*last_stmt
= stmts
->pop ();
3362 enum tree_code rhs_code
;
3363 tree var
, lhs
, rhs
, vectype
;
3364 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3365 stmt_vec_info new_stmt_info
;
3366 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3367 gimple
*pattern_stmt
;
3369 if (!is_gimple_assign (last_stmt
))
3372 var
= gimple_assign_rhs1 (last_stmt
);
3373 lhs
= gimple_assign_lhs (last_stmt
);
3375 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3376 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3377 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3380 hash_set
<gimple
*> bool_stmts
;
3382 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3383 if (CONVERT_EXPR_CODE_P (rhs_code
))
3385 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3386 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3388 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3389 if (vectype
== NULL_TREE
)
3392 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3394 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (lhs
), last_stmt
);
3395 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3396 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3397 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3400 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3404 tree type
= search_type_for_mask (var
, vinfo
);
3405 tree cst0
, cst1
, tmp
;
3410 /* We may directly use cond with narrowed type to avoid
3411 multiple cond exprs with following result packing and
3412 perform single cond with packed mask instead. In case
3413 of widening we better make cond first and then extract
3415 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
3416 type
= TREE_TYPE (lhs
);
3418 cst0
= build_int_cst (type
, 0);
3419 cst1
= build_int_cst (type
, 1);
3420 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
3421 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
3423 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
3425 tree new_vectype
= get_vectype_for_scalar_type (type
);
3426 new_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3427 set_vinfo_for_stmt (pattern_stmt
, new_stmt_info
);
3428 STMT_VINFO_VECTYPE (new_stmt_info
) = new_vectype
;
3429 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3431 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3432 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
3436 *type_out
= vectype
;
3438 stmts
->safe_push (last_stmt
);
3439 if (dump_enabled_p ())
3440 dump_printf_loc (MSG_NOTE
, vect_location
,
3441 "vect_recog_bool_pattern: detected:\n");
3443 return pattern_stmt
;
3445 else if (rhs_code
== COND_EXPR
3446 && TREE_CODE (var
) == SSA_NAME
)
3448 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3449 if (vectype
== NULL_TREE
)
3452 /* Build a scalar type for the boolean result that when
3453 vectorized matches the vector type of the result in
3454 size and number of elements. */
3456 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3457 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3459 = build_nonstandard_integer_type (prec
,
3460 TYPE_UNSIGNED (TREE_TYPE (var
)));
3461 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3464 if (!check_bool_pattern (var
, vinfo
, bool_stmts
))
3467 rhs
= adjust_bool_stmts (bool_stmts
, type
, last_stmt
);
3469 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3471 = gimple_build_assign (lhs
, COND_EXPR
,
3472 build2 (NE_EXPR
, boolean_type_node
,
3473 rhs
, build_int_cst (type
, 0)),
3474 gimple_assign_rhs2 (last_stmt
),
3475 gimple_assign_rhs3 (last_stmt
));
3476 *type_out
= vectype
;
3478 stmts
->safe_push (last_stmt
);
3479 if (dump_enabled_p ())
3480 dump_printf_loc (MSG_NOTE
, vect_location
,
3481 "vect_recog_bool_pattern: detected:\n");
3483 return pattern_stmt
;
3485 else if (rhs_code
== SSA_NAME
3486 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3488 stmt_vec_info pattern_stmt_info
;
3489 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3490 gcc_assert (vectype
!= NULL_TREE
);
3491 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3494 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3495 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (vectype
), last_stmt
);
3498 tree type
= search_type_for_mask (var
, vinfo
);
3499 tree cst0
, cst1
, new_vectype
;
3504 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
3505 type
= TREE_TYPE (vectype
);
3507 cst0
= build_int_cst (type
, 0);
3508 cst1
= build_int_cst (type
, 1);
3509 new_vectype
= get_vectype_for_scalar_type (type
);
3511 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
3512 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
3514 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3515 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3516 STMT_VINFO_VECTYPE (pattern_stmt_info
) = new_vectype
;
3517 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3520 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3521 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3523 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3524 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3525 append_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3528 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3529 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3530 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3531 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3532 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3533 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3534 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3535 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3536 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3537 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3538 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3539 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3540 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3541 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3542 *type_out
= vectype
;
3544 stmts
->safe_push (last_stmt
);
3545 if (dump_enabled_p ())
3546 dump_printf_loc (MSG_NOTE
, vect_location
,
3547 "vect_recog_bool_pattern: detected:\n");
3548 return pattern_stmt
;
3555 /* A helper for vect_recog_mask_conversion_pattern. Build
3556 conversion of MASK to a type suitable for masking VECTYPE.
3557 Built statement gets required vectype and is appended to
3558 a pattern sequence of STMT_VINFO.
3560 Return converted mask. */
3563 build_mask_conversion (tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
,
3568 stmt_vec_info new_stmt_info
;
3570 masktype
= build_same_sized_truth_vector_type (vectype
);
3571 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
3572 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
3573 new_stmt_info
= new_stmt_vec_info (stmt
, vinfo
);
3574 set_vinfo_for_stmt (stmt
, new_stmt_info
);
3575 STMT_VINFO_VECTYPE (new_stmt_info
) = masktype
;
3576 append_pattern_def_seq (stmt_vinfo
, stmt
);
3582 /* Function vect_recog_mask_conversion_pattern
3584 Try to find statements which require boolean type
3585 converison. Additional conversion statements are
3586 added to handle such cases. For example:
3596 S4 c_1 = m_3 ? c_2 : c_3;
3598 Will be transformed into:
3602 S3'' m_2' = (_Bool[bitsize=32])m_2
3603 S3' m_3' = m_1 & m_2';
3604 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3605 S4' c_1' = m_3'' ? c_2 : c_3; */
3608 vect_recog_mask_conversion_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
3611 gimple
*last_stmt
= stmts
->pop ();
3612 enum tree_code rhs_code
;
3613 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
3614 tree vectype1
, vectype2
;
3615 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3616 stmt_vec_info pattern_stmt_info
;
3617 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3618 gimple
*pattern_stmt
;
3620 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3621 if (is_gimple_call (last_stmt
)
3622 && gimple_call_internal_p (last_stmt
)
3623 && (gimple_call_internal_fn (last_stmt
) == IFN_MASK_STORE
3624 || gimple_call_internal_fn (last_stmt
) == IFN_MASK_LOAD
))
3626 bool load
= (gimple_call_internal_fn (last_stmt
) == IFN_MASK_LOAD
);
3630 lhs
= gimple_call_lhs (last_stmt
);
3631 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3635 rhs2
= gimple_call_arg (last_stmt
, 3);
3636 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (rhs2
));
3639 rhs1
= gimple_call_arg (last_stmt
, 2);
3640 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3643 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3645 if (!vectype1
|| !vectype2
3646 || TYPE_VECTOR_SUBPARTS (vectype1
) == TYPE_VECTOR_SUBPARTS (vectype2
))
3649 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
3653 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3655 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
3656 gimple_call_arg (last_stmt
, 0),
3657 gimple_call_arg (last_stmt
, 1),
3659 gimple_call_set_lhs (pattern_stmt
, lhs
);
3663 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
3664 gimple_call_arg (last_stmt
, 0),
3665 gimple_call_arg (last_stmt
, 1),
3667 gimple_call_arg (last_stmt
, 3));
3670 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3671 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3672 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3673 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3674 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3675 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3676 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3677 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3678 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3679 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3680 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3681 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3682 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3684 *type_out
= vectype1
;
3685 *type_in
= vectype1
;
3686 stmts
->safe_push (last_stmt
);
3687 if (dump_enabled_p ())
3688 dump_printf_loc (MSG_NOTE
, vect_location
,
3689 "vect_recog_mask_conversion_pattern: detected:\n");
3691 return pattern_stmt
;
3694 if (!is_gimple_assign (last_stmt
))
3697 lhs
= gimple_assign_lhs (last_stmt
);
3698 rhs1
= gimple_assign_rhs1 (last_stmt
);
3699 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3701 /* Check for cond expression requiring mask conversion. */
3702 if (rhs_code
== COND_EXPR
)
3704 /* vect_recog_mixed_size_cond_pattern could apply.
3706 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
3709 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3711 if (TREE_CODE (rhs1
) == SSA_NAME
)
3713 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3717 else if (COMPARISON_CLASS_P (rhs1
))
3718 rhs1_type
= TREE_TYPE (TREE_OPERAND (rhs1
, 0));
3722 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3724 if (!vectype1
|| !vectype2
3725 || TYPE_VECTOR_SUBPARTS (vectype1
) == TYPE_VECTOR_SUBPARTS (vectype2
))
3728 /* If rhs1 is a comparison we need to move it into a
3729 separate statement. */
3730 if (TREE_CODE (rhs1
) != SSA_NAME
)
3732 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
3733 pattern_stmt
= gimple_build_assign (tmp
, rhs1
);
3736 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3737 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3738 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype2
;
3739 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3742 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
3744 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3745 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
3746 gimple_assign_rhs2 (last_stmt
),
3747 gimple_assign_rhs3 (last_stmt
));
3749 *type_out
= vectype1
;
3750 *type_in
= vectype1
;
3751 stmts
->safe_push (last_stmt
);
3752 if (dump_enabled_p ())
3753 dump_printf_loc (MSG_NOTE
, vect_location
,
3754 "vect_recog_mask_conversion_pattern: detected:\n");
3756 return pattern_stmt
;
3759 /* Now check for binary boolean operations requiring conversion for
3761 if (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
)
3764 if (rhs_code
!= BIT_IOR_EXPR
3765 && rhs_code
!= BIT_XOR_EXPR
3766 && rhs_code
!= BIT_AND_EXPR
3767 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
3770 rhs2
= gimple_assign_rhs2 (last_stmt
);
3772 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3773 rhs2_type
= search_type_for_mask (rhs2
, vinfo
);
3775 if (!rhs1_type
|| !rhs2_type
3776 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
3779 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
3781 vectype1
= get_mask_type_for_scalar_type (rhs1_type
);
3784 rhs2
= build_mask_conversion (rhs2
, vectype1
, stmt_vinfo
, vinfo
);
3788 vectype1
= get_mask_type_for_scalar_type (rhs2_type
);
3791 rhs1
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
3794 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3795 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
3797 *type_out
= vectype1
;
3798 *type_in
= vectype1
;
3799 stmts
->safe_push (last_stmt
);
3800 if (dump_enabled_p ())
3801 dump_printf_loc (MSG_NOTE
, vect_location
,
3802 "vect_recog_mask_conversion_pattern: detected:\n");
3804 return pattern_stmt
;
3808 /* Mark statements that are involved in a pattern. */
3811 vect_mark_pattern_stmts (gimple
*orig_stmt
, gimple
*pattern_stmt
,
3812 tree pattern_vectype
)
3814 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3815 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3816 vec_info
*vinfo
= orig_stmt_info
->vinfo
;
3819 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3820 if (pattern_stmt_info
== NULL
)
3822 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3823 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3825 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3827 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3828 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3829 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3830 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3831 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3832 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3833 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3834 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3835 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3837 gimple_stmt_iterator si
;
3838 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3839 !gsi_end_p (si
); gsi_next (&si
))
3841 def_stmt
= gsi_stmt (si
);
3842 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3843 if (def_stmt_info
== NULL
)
3845 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
3846 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3848 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3849 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3850 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3851 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3852 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3857 /* Function vect_pattern_recog_1
3860 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3861 computation pattern.
3862 STMT: A stmt from which the pattern search should start.
3864 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3865 expression that computes the same functionality and can be used to
3866 replace the sequence of stmts that are involved in the pattern.
3869 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3870 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3871 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3872 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3873 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3874 to the available target pattern.
3876 This function also does some bookkeeping, as explained in the documentation
3877 for vect_recog_pattern. */
3880 vect_pattern_recog_1 (vect_recog_func
*recog_func
,
3881 gimple_stmt_iterator si
,
3882 vec
<gimple
*> *stmts_to_replace
)
3884 gimple
*stmt
= gsi_stmt (si
), *pattern_stmt
;
3885 stmt_vec_info stmt_info
;
3886 loop_vec_info loop_vinfo
;
3887 tree pattern_vectype
;
3888 tree type_in
, type_out
;
3889 enum tree_code code
;
3893 stmts_to_replace
->truncate (0);
3894 stmts_to_replace
->quick_push (stmt
);
3895 pattern_stmt
= recog_func
->fn (stmts_to_replace
, &type_in
, &type_out
);
3899 stmt
= stmts_to_replace
->last ();
3900 stmt_info
= vinfo_for_stmt (stmt
);
3901 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3903 if (VECTOR_BOOLEAN_TYPE_P (type_in
)
3904 || VECTOR_MODE_P (TYPE_MODE (type_in
)))
3906 /* No need to check target support (already checked by the pattern
3907 recognition function). */
3908 pattern_vectype
= type_out
? type_out
: type_in
;
3912 machine_mode vec_mode
;
3913 enum insn_code icode
;
3916 /* Check target support */
3917 type_in
= get_vectype_for_scalar_type (type_in
);
3921 type_out
= get_vectype_for_scalar_type (type_out
);
3926 pattern_vectype
= type_out
;
3928 if (is_gimple_assign (pattern_stmt
))
3929 code
= gimple_assign_rhs_code (pattern_stmt
);
3932 gcc_assert (is_gimple_call (pattern_stmt
));
3936 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3937 vec_mode
= TYPE_MODE (type_in
);
3939 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3940 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3944 /* Found a vectorizable pattern. */
3945 if (dump_enabled_p ())
3947 dump_printf_loc (MSG_NOTE
, vect_location
,
3948 "%s pattern recognized: ", recog_func
->name
);
3949 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3952 /* Mark the stmts that are involved in the pattern. */
3953 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3955 /* Patterns cannot be vectorized using SLP, because they change the order of
3958 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3960 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3962 /* It is possible that additional pattern stmts are created and inserted in
3963 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3964 relevant statements. */
3965 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3966 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3969 stmt_info
= vinfo_for_stmt (stmt
);
3970 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3971 if (dump_enabled_p ())
3973 dump_printf_loc (MSG_NOTE
, vect_location
,
3974 "additional pattern stmt: ");
3975 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3978 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3985 /* Function vect_pattern_recog
3988 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3991 Output - for each computation idiom that is detected we create a new stmt
3992 that provides the same functionality and that can be vectorized. We
3993 also record some information in the struct_stmt_info of the relevant
3994 stmts, as explained below:
3996 At the entry to this function we have the following stmts, with the
3997 following initial value in the STMT_VINFO fields:
3999 stmt in_pattern_p related_stmt vec_stmt
4000 S1: a_i = .... - - -
4001 S2: a_2 = ..use(a_i).. - - -
4002 S3: a_1 = ..use(a_2).. - - -
4003 S4: a_0 = ..use(a_1).. - - -
4004 S5: ... = ..use(a_0).. - - -
4006 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4007 represented by a single stmt. We then:
4008 - create a new stmt S6 equivalent to the pattern (the stmt is not
4009 inserted into the code)
4010 - fill in the STMT_VINFO fields as follows:
4012 in_pattern_p related_stmt vec_stmt
4013 S1: a_i = .... - - -
4014 S2: a_2 = ..use(a_i).. - - -
4015 S3: a_1 = ..use(a_2).. - - -
4016 S4: a_0 = ..use(a_1).. true S6 -
4017 '---> S6: a_new = .... - S4 -
4018 S5: ... = ..use(a_0).. - - -
4020 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4021 to each other through the RELATED_STMT field).
4023 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4024 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4025 remain irrelevant unless used by stmts other than S4.
4027 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4028 (because they are marked as irrelevant). It will vectorize S6, and record
4029 a pointer to the new vector stmt VS6 from S6 (as usual).
4030 S4 will be skipped, and S5 will be vectorized as usual:
4032 in_pattern_p related_stmt vec_stmt
4033 S1: a_i = .... - - -
4034 S2: a_2 = ..use(a_i).. - - -
4035 S3: a_1 = ..use(a_2).. - - -
4036 > VS6: va_new = .... - - -
4037 S4: a_0 = ..use(a_1).. true S6 VS6
4038 '---> S6: a_new = .... - S4 VS6
4039 > VS5: ... = ..vuse(va_new).. - - -
4040 S5: ... = ..use(a_0).. - - -
4042 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4043 elsewhere), and we'll end up with:
4046 VS5: ... = ..vuse(va_new)..
4048 In case of more than one pattern statements, e.g., widen-mult with
4052 S2 a_T = (TYPE) a_t;
4053 '--> S3: a_it = (interm_type) a_t;
4054 S4 prod_T = a_T * CONST;
4055 '--> S5: prod_T' = a_it w* CONST;
4057 there may be other users of a_T outside the pattern. In that case S2 will
4058 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4059 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4060 be recorded in S3. */
4063 vect_pattern_recog (vec_info
*vinfo
)
4068 gimple_stmt_iterator si
;
4070 auto_vec
<gimple
*, 1> stmts_to_replace
;
4073 if (dump_enabled_p ())
4074 dump_printf_loc (MSG_NOTE
, vect_location
,
4075 "=== vect_pattern_recog ===\n");
4077 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4079 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4080 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4081 nbbs
= loop
->num_nodes
;
4083 /* Scan through the loop stmts, applying the pattern recognition
4084 functions starting at each stmt visited: */
4085 for (i
= 0; i
< nbbs
; i
++)
4087 basic_block bb
= bbs
[i
];
4088 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
4090 /* Scan over all generic vect_recog_xxx_pattern functions. */
4091 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4092 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
,
4100 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4101 for (si
= bb_vinfo
->region_begin
;
4102 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
4104 if ((stmt
= gsi_stmt (si
))
4105 && vinfo_for_stmt (stmt
)
4106 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
4109 /* Scan over all generic vect_recog_xxx_pattern functions. */
4110 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4111 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
,