1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
33 #include "dominance.h"
34 #include "gimple-pretty-print.h"
35 #include "internal-fn.h"
38 #include "gimple-iterator.h"
41 #include "insn-config.h"
43 #include "insn-codes.h"
44 #include "optabs-tree.h"
46 #include "tree-data-ref.h"
47 #include "tree-vectorizer.h"
48 #include "recog.h" /* FIXME: for insn_data */
49 #include "diagnostic-core.h"
53 /* Pattern recognition functions */
54 static gimple
*vect_recog_widen_sum_pattern (vec
<gimple
*> *, tree
*,
56 static gimple
*vect_recog_widen_mult_pattern (vec
<gimple
*> *, tree
*,
58 static gimple
*vect_recog_dot_prod_pattern (vec
<gimple
*> *, tree
*,
60 static gimple
*vect_recog_sad_pattern (vec
<gimple
*> *, tree
*,
62 static gimple
*vect_recog_pow_pattern (vec
<gimple
*> *, tree
*, tree
*);
63 static gimple
*vect_recog_over_widening_pattern (vec
<gimple
*> *, tree
*,
65 static gimple
*vect_recog_widen_shift_pattern (vec
<gimple
*> *,
67 static gimple
*vect_recog_rotate_pattern (vec
<gimple
*> *, tree
*, tree
*);
68 static gimple
*vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *,
70 static gimple
*vect_recog_divmod_pattern (vec
<gimple
*> *,
73 static gimple
*vect_recog_mult_pattern (vec
<gimple
*> *,
76 static gimple
*vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *,
78 static gimple
*vect_recog_bool_pattern (vec
<gimple
*> *, tree
*, tree
*);
79 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
80 vect_recog_widen_mult_pattern
,
81 vect_recog_widen_sum_pattern
,
82 vect_recog_dot_prod_pattern
,
83 vect_recog_sad_pattern
,
84 vect_recog_pow_pattern
,
85 vect_recog_widen_shift_pattern
,
86 vect_recog_over_widening_pattern
,
87 vect_recog_rotate_pattern
,
88 vect_recog_vector_vector_shift_pattern
,
89 vect_recog_divmod_pattern
,
90 vect_recog_mult_pattern
,
91 vect_recog_mixed_size_cond_pattern
,
92 vect_recog_bool_pattern
};
95 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
97 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
102 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
104 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
105 append_pattern_def_seq (stmt_info
, stmt
);
108 /* Check whether STMT2 is in the same loop or basic block as STMT1.
109 Which of the two applies depends on whether we're currently doing
110 loop-based or basic-block-based vectorization, as determined by
111 the vinfo_for_stmt for STMT1 (which must be defined).
113 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
114 to be defined as well. */
117 vect_same_loop_or_bb_p (gimple
*stmt1
, gimple
*stmt2
)
119 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
120 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
121 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
123 if (!gimple_bb (stmt2
))
128 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
129 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
134 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
135 || gimple_code (stmt2
) == GIMPLE_PHI
)
139 gcc_assert (vinfo_for_stmt (stmt2
));
143 /* If the LHS of DEF_STMT has a single use, and that statement is
144 in the same loop or basic block, return it. */
147 vect_single_imm_use (gimple
*def_stmt
)
149 tree lhs
= gimple_assign_lhs (def_stmt
);
153 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
156 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
162 /* Check whether NAME, an ssa-name used in USE_STMT,
163 is a result of a type promotion, such that:
164 DEF_STMT: NAME = NOP (name0)
165 If CHECK_SIGN is TRUE, check that either both types are signed or both are
169 type_conversion_p (tree name
, gimple
*use_stmt
, bool check_sign
,
170 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
173 gimple
*dummy_gimple
;
174 stmt_vec_info stmt_vinfo
;
175 tree type
= TREE_TYPE (name
);
177 enum vect_def_type dt
;
180 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
181 if (!vect_is_simple_use (name
, use_stmt
, stmt_vinfo
->vinfo
, def_stmt
,
185 if (dt
!= vect_internal_def
186 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
192 if (!is_gimple_assign (*def_stmt
))
195 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
198 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
200 *orig_type
= TREE_TYPE (oprnd0
);
201 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
202 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
205 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
210 if (!vect_is_simple_use (oprnd0
, *def_stmt
, stmt_vinfo
->vinfo
,
211 &dummy_gimple
, &dummy
, &dt
))
217 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
218 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
221 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
223 return make_temp_ssa_name (type
, stmt
, "patt");
226 /* Function vect_recog_dot_prod_pattern
228 Try to find the following pattern:
234 sum_0 = phi <init, sum_1>
237 S3 x_T = (TYPE1) x_t;
238 S4 y_T = (TYPE1) y_t;
240 [S6 prod = (TYPE2) prod; #optional]
241 S7 sum_1 = prod + sum_0;
243 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
244 same size of 'TYPE1' or bigger. This is a special case of a reduction
249 * STMTS: Contains a stmt from which the pattern search begins. In the
250 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
255 * TYPE_IN: The type of the input arguments to the pattern.
257 * TYPE_OUT: The type of the output of this pattern.
259 * Return value: A new stmt that will be used to replace the sequence of
260 stmts that constitute the pattern. In this case it will be:
261 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
263 Note: The dot-prod idiom is a widening reduction pattern that is
264 vectorized without preserving all the intermediate results. It
265 produces only N/2 (widened) results (by summing up pairs of
266 intermediate results) rather than all N results. Therefore, we
267 cannot allow this pattern when we want to get all the results and in
268 the correct order (as is the case when this computation is in an
269 inner-loop nested in an outer-loop that us being vectorized). */
272 vect_recog_dot_prod_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
275 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
277 tree oprnd00
, oprnd01
;
278 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
279 tree type
, half_type
;
280 gimple
*pattern_stmt
;
282 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
290 loop
= LOOP_VINFO_LOOP (loop_info
);
292 /* We don't allow changing the order of the computation in the inner-loop
293 when doing outer-loop vectorization. */
294 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
297 if (!is_gimple_assign (last_stmt
))
300 type
= gimple_expr_type (last_stmt
);
302 /* Look for the following pattern
306 DDPROD = (TYPE2) DPROD;
307 sum_1 = DDPROD + sum_0;
309 - DX is double the size of X
310 - DY is double the size of Y
311 - DX, DY, DPROD all have the same type
312 - sum is the same size of DPROD or bigger
313 - sum has been recognized as a reduction variable.
315 This is equivalent to:
316 DPROD = X w* Y; #widen mult
317 sum_1 = DPROD w+ sum_0; #widen summation
319 DPROD = X w* Y; #widen mult
320 sum_1 = DPROD + sum_0; #summation
323 /* Starting from LAST_STMT, follow the defs of its uses in search
324 of the above pattern. */
326 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
329 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
331 /* Has been detected as widening-summation? */
333 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
334 type
= gimple_expr_type (stmt
);
335 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
337 oprnd0
= gimple_assign_rhs1 (stmt
);
338 oprnd1
= gimple_assign_rhs2 (stmt
);
339 half_type
= TREE_TYPE (oprnd0
);
345 oprnd0
= gimple_assign_rhs1 (last_stmt
);
346 oprnd1
= gimple_assign_rhs2 (last_stmt
);
347 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
348 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
352 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
357 oprnd0
= gimple_assign_rhs1 (stmt
);
363 /* So far so good. Since last_stmt was detected as a (summation) reduction,
364 we know that oprnd1 is the reduction variable (defined by a loop-header
365 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
366 Left to check that oprnd0 is defined by a (widen_)mult_expr */
367 if (TREE_CODE (oprnd0
) != SSA_NAME
)
370 prod_type
= half_type
;
371 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
373 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
374 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
377 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
378 inside the loop (in case we are analyzing an outer-loop). */
379 if (!is_gimple_assign (stmt
))
381 stmt_vinfo
= vinfo_for_stmt (stmt
);
382 gcc_assert (stmt_vinfo
);
383 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
385 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
387 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
389 /* Has been detected as a widening multiplication? */
391 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
392 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
394 stmt_vinfo
= vinfo_for_stmt (stmt
);
395 gcc_assert (stmt_vinfo
);
396 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
397 oprnd00
= gimple_assign_rhs1 (stmt
);
398 oprnd01
= gimple_assign_rhs2 (stmt
);
399 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
400 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
404 tree half_type0
, half_type1
;
408 oprnd0
= gimple_assign_rhs1 (stmt
);
409 oprnd1
= gimple_assign_rhs2 (stmt
);
410 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
411 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
413 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
417 oprnd00
= gimple_assign_rhs1 (def_stmt
);
418 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
422 oprnd01
= gimple_assign_rhs1 (def_stmt
);
423 if (!types_compatible_p (half_type0
, half_type1
))
425 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
429 half_type
= TREE_TYPE (oprnd00
);
430 *type_in
= half_type
;
433 /* Pattern detected. Create a stmt to be used to replace the pattern: */
434 var
= vect_recog_temp_ssa_var (type
, NULL
);
435 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
436 oprnd00
, oprnd01
, oprnd1
);
438 if (dump_enabled_p ())
440 dump_printf_loc (MSG_NOTE
, vect_location
,
441 "vect_recog_dot_prod_pattern: detected: ");
442 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
443 dump_printf (MSG_NOTE
, "\n");
450 /* Function vect_recog_sad_pattern
452 Try to find the following Sum of Absolute Difference (SAD) pattern:
455 signed TYPE1 diff, abs_diff;
458 sum_0 = phi <init, sum_1>
461 S3 x_T = (TYPE1) x_t;
462 S4 y_T = (TYPE1) y_t;
464 S6 abs_diff = ABS_EXPR <diff>;
465 [S7 abs_diff = (TYPE2) abs_diff; #optional]
466 S8 sum_1 = abs_diff + sum_0;
468 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
469 same size of 'TYPE1' or bigger. This is a special case of a reduction
474 * STMTS: Contains a stmt from which the pattern search begins. In the
475 example, when this function is called with S8, the pattern
476 {S3,S4,S5,S6,S7,S8} will be detected.
480 * TYPE_IN: The type of the input arguments to the pattern.
482 * TYPE_OUT: The type of the output of this pattern.
484 * Return value: A new stmt that will be used to replace the sequence of
485 stmts that constitute the pattern. In this case it will be:
486 SAD_EXPR <x_t, y_t, sum_0>
490 vect_recog_sad_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
493 gimple
*last_stmt
= (*stmts
)[0];
494 tree sad_oprnd0
, sad_oprnd1
;
495 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
497 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
504 loop
= LOOP_VINFO_LOOP (loop_info
);
506 /* We don't allow changing the order of the computation in the inner-loop
507 when doing outer-loop vectorization. */
508 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
511 if (!is_gimple_assign (last_stmt
))
514 tree sum_type
= gimple_expr_type (last_stmt
);
516 /* Look for the following pattern
520 DAD = ABS_EXPR <DDIFF>;
521 DDPROD = (TYPE2) DPROD;
524 - DX is at least double the size of X
525 - DY is at least double the size of Y
526 - DX, DY, DDIFF, DAD all have the same type
527 - sum is the same size of DAD or bigger
528 - sum has been recognized as a reduction variable.
530 This is equivalent to:
531 DDIFF = X w- Y; #widen sub
532 DAD = ABS_EXPR <DDIFF>;
533 sum_1 = DAD w+ sum_0; #widen summation
535 DDIFF = X w- Y; #widen sub
536 DAD = ABS_EXPR <DDIFF>;
537 sum_1 = DAD + sum_0; #summation
540 /* Starting from LAST_STMT, follow the defs of its uses in search
541 of the above pattern. */
543 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
546 tree plus_oprnd0
, plus_oprnd1
;
548 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
550 /* Has been detected as widening-summation? */
552 gimple
*stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
553 sum_type
= gimple_expr_type (stmt
);
554 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
556 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
557 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
558 half_type
= TREE_TYPE (plus_oprnd0
);
564 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
565 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
566 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
567 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
570 /* The type conversion could be promotion, demotion,
571 or just signed -> unsigned. */
572 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
573 &half_type
, &def_stmt
, &promotion
))
574 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
576 half_type
= sum_type
;
579 /* So far so good. Since last_stmt was detected as a (summation) reduction,
580 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
581 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
582 Then check that plus_oprnd0 is defined by an abs_expr. */
584 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
587 tree abs_type
= half_type
;
588 gimple
*abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
590 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
591 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
594 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
595 inside the loop (in case we are analyzing an outer-loop). */
596 if (!is_gimple_assign (abs_stmt
))
599 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
600 gcc_assert (abs_stmt_vinfo
);
601 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
603 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
606 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
607 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
609 if (TYPE_UNSIGNED (abs_type
))
612 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
614 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
617 gimple
*diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
619 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
620 if (!gimple_bb (diff_stmt
)
621 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
624 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
625 inside the loop (in case we are analyzing an outer-loop). */
626 if (!is_gimple_assign (diff_stmt
))
629 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
630 gcc_assert (diff_stmt_vinfo
);
631 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
633 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
636 tree half_type0
, half_type1
;
639 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
640 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
642 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
643 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
645 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
646 &half_type0
, &def_stmt
, &promotion
)
649 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
651 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
652 &half_type1
, &def_stmt
, &promotion
)
655 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
657 if (!types_compatible_p (half_type0
, half_type1
))
659 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
660 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
663 *type_in
= TREE_TYPE (sad_oprnd0
);
664 *type_out
= sum_type
;
666 /* Pattern detected. Create a stmt to be used to replace the pattern: */
667 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
668 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
669 sad_oprnd1
, plus_oprnd1
);
671 if (dump_enabled_p ())
673 dump_printf_loc (MSG_NOTE
, vect_location
,
674 "vect_recog_sad_pattern: detected: ");
675 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
676 dump_printf (MSG_NOTE
, "\n");
683 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
686 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
687 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
689 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
690 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
691 that satisfies the above restrictions, we can perform a widening opeartion
692 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
693 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
696 vect_handle_widen_op_by_const (gimple
*stmt
, enum tree_code code
,
697 tree const_oprnd
, tree
*oprnd
,
698 gimple
**wstmt
, tree type
,
699 tree
*half_type
, gimple
*def_stmt
)
701 tree new_type
, new_oprnd
;
703 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
706 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
707 || (code
== LSHIFT_EXPR
708 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
710 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
712 /* CONST_OPRND is a constant of HALF_TYPE. */
713 *oprnd
= gimple_assign_rhs1 (def_stmt
);
717 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
720 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
723 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
724 a type 2 times bigger than HALF_TYPE. */
725 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
726 TYPE_UNSIGNED (type
));
727 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
728 || (code
== LSHIFT_EXPR
729 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
732 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
733 *oprnd
= gimple_assign_rhs1 (def_stmt
);
734 new_oprnd
= make_ssa_name (new_type
);
735 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
738 *half_type
= new_type
;
743 /* Function vect_recog_widen_mult_pattern
745 Try to find the following pattern:
749 TYPE a_T, b_T, prod_T;
755 S5 prod_T = a_T * b_T;
757 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
759 Also detect unsigned cases:
763 unsigned TYPE u_prod_T;
764 TYPE a_T, b_T, prod_T;
770 S5 prod_T = a_T * b_T;
771 S6 u_prod_T = (unsigned TYPE) prod_T;
773 and multiplication by constants:
780 S5 prod_T = a_T * CONST;
782 A special case of multiplication by constants is when 'TYPE' is 4 times
783 bigger than 'type', but CONST fits an intermediate type 2 times smaller
784 than 'TYPE'. In that case we create an additional pattern stmt for S3
785 to create a variable of the intermediate type, and perform widen-mult
786 on the intermediate type as well:
790 TYPE a_T, prod_T, prod_T';
794 '--> a_it = (interm_type) a_t;
795 S5 prod_T = a_T * CONST;
796 '--> prod_T' = a_it w* CONST;
800 * STMTS: Contains a stmt from which the pattern search begins. In the
801 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
802 is detected. In case of unsigned widen-mult, the original stmt (S5) is
803 replaced with S6 in STMTS. In case of multiplication by a constant
804 of an intermediate type (the last case above), STMTS also contains S3
805 (inserted before S5).
809 * TYPE_IN: The type of the input arguments to the pattern.
811 * TYPE_OUT: The type of the output of this pattern.
813 * Return value: A new stmt that will be used to replace the sequence of
814 stmts that constitute the pattern. In this case it will be:
815 WIDEN_MULT <a_t, b_t>
816 If the result of WIDEN_MULT needs to be converted to a larger type, the
817 returned stmt will be this type conversion stmt.
821 vect_recog_widen_mult_pattern (vec
<gimple
*> *stmts
,
822 tree
*type_in
, tree
*type_out
)
824 gimple
*last_stmt
= stmts
->pop ();
825 gimple
*def_stmt0
, *def_stmt1
;
827 tree type
, half_type0
, half_type1
;
828 gimple
*new_stmt
= NULL
, *pattern_stmt
= NULL
;
829 tree vectype
, vecitype
;
831 enum tree_code dummy_code
;
837 if (!is_gimple_assign (last_stmt
))
840 type
= gimple_expr_type (last_stmt
);
842 /* Starting from LAST_STMT, follow the defs of its uses in search
843 of the above pattern. */
845 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
848 oprnd0
= gimple_assign_rhs1 (last_stmt
);
849 oprnd1
= gimple_assign_rhs2 (last_stmt
);
850 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
851 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
854 /* Check argument 0. */
855 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
859 /* Check argument 1. */
860 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
861 &def_stmt1
, &promotion
);
863 if (op1_ok
&& promotion
)
865 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
866 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
870 if (TREE_CODE (oprnd1
) == INTEGER_CST
871 && TREE_CODE (half_type0
) == INTEGER_TYPE
872 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
873 &oprnd0
, &new_stmt
, type
,
874 &half_type0
, def_stmt0
))
876 half_type1
= half_type0
;
877 oprnd1
= fold_convert (half_type1
, oprnd1
);
883 /* If the two arguments have different sizes, convert the one with
884 the smaller type into the larger type. */
885 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
887 /* If we already used up the single-stmt slot give up. */
892 gimple
*def_stmt
= NULL
;
894 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
896 def_stmt
= def_stmt0
;
897 half_type0
= half_type1
;
902 def_stmt
= def_stmt1
;
903 half_type1
= half_type0
;
907 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
908 tree new_oprnd
= make_ssa_name (half_type0
);
909 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
913 /* Handle unsigned case. Look for
914 S6 u_prod_T = (unsigned TYPE) prod_T;
915 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
916 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
922 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
925 use_stmt
= vect_single_imm_use (last_stmt
);
926 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
927 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
930 use_lhs
= gimple_assign_lhs (use_stmt
);
931 use_type
= TREE_TYPE (use_lhs
);
932 if (!INTEGRAL_TYPE_P (use_type
)
933 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
934 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
938 last_stmt
= use_stmt
;
941 if (!types_compatible_p (half_type0
, half_type1
))
944 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
945 to get an intermediate result of type ITYPE. In this case we need
946 to build a statement to convert this intermediate result to type TYPE. */
948 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
949 itype
= build_nonstandard_integer_type
950 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
951 TYPE_UNSIGNED (type
));
953 /* Pattern detected. */
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_NOTE
, vect_location
,
956 "vect_recog_widen_mult_pattern: detected:\n");
958 /* Check target support */
959 vectype
= get_vectype_for_scalar_type (half_type0
);
960 vecitype
= get_vectype_for_scalar_type (itype
);
963 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
965 &dummy_code
, &dummy_code
,
966 &dummy_int
, &dummy_vec
))
970 *type_out
= get_vectype_for_scalar_type (type
);
972 /* Pattern supported. Create a stmt to be used to replace the pattern: */
973 var
= vect_recog_temp_ssa_var (itype
, NULL
);
974 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
976 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
977 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
979 /* If the original two operands have different sizes, we may need to convert
980 the smaller one into the larget type. If this is the case, at this point
981 the new stmt is already built. */
984 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
985 stmt_vec_info new_stmt_info
986 = new_stmt_vec_info (new_stmt
, stmt_vinfo
->vinfo
);
987 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
988 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
991 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
992 the result of the widen-mult operation into type TYPE. */
995 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
996 stmt_vec_info pattern_stmt_info
997 = new_stmt_vec_info (pattern_stmt
, stmt_vinfo
->vinfo
);
998 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
999 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1000 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
1002 gimple_assign_lhs (pattern_stmt
));
1005 if (dump_enabled_p ())
1006 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1008 stmts
->safe_push (last_stmt
);
1009 return pattern_stmt
;
1013 /* Function vect_recog_pow_pattern
1015 Try to find the following pattern:
1019 with POW being one of pow, powf, powi, powif and N being
1024 * LAST_STMT: A stmt from which the pattern search begins.
1028 * TYPE_IN: The type of the input arguments to the pattern.
1030 * TYPE_OUT: The type of the output of this pattern.
1032 * Return value: A new stmt that will be used to replace the sequence of
1033 stmts that constitute the pattern. In this case it will be:
1040 vect_recog_pow_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1043 gimple
*last_stmt
= (*stmts
)[0];
1044 tree fn
, base
, exp
= NULL
;
1048 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1051 fn
= gimple_call_fndecl (last_stmt
);
1052 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1055 switch (DECL_FUNCTION_CODE (fn
))
1057 case BUILT_IN_POWIF
:
1061 base
= gimple_call_arg (last_stmt
, 0);
1062 exp
= gimple_call_arg (last_stmt
, 1);
1063 if (TREE_CODE (exp
) != REAL_CST
1064 && TREE_CODE (exp
) != INTEGER_CST
)
1072 /* We now have a pow or powi builtin function call with a constant
1075 *type_out
= NULL_TREE
;
1077 /* Catch squaring. */
1078 if ((tree_fits_shwi_p (exp
)
1079 && tree_to_shwi (exp
) == 2)
1080 || (TREE_CODE (exp
) == REAL_CST
1081 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1083 *type_in
= TREE_TYPE (base
);
1085 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1086 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1090 /* Catch square root. */
1091 if (TREE_CODE (exp
) == REAL_CST
1092 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1094 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1095 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1098 gcall
*stmt
= gimple_build_call (newfn
, 1, base
);
1099 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1102 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1103 gimple_call_set_lhs (stmt
, var
);
1113 /* Function vect_recog_widen_sum_pattern
1115 Try to find the following pattern:
1118 TYPE x_T, sum = init;
1120 sum_0 = phi <init, sum_1>
1122 S2 x_T = (TYPE) x_t;
1123 S3 sum_1 = x_T + sum_0;
1125 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1126 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1127 a special case of a reduction computation.
1131 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1132 when this function is called with S3, the pattern {S2,S3} will be detected.
1136 * TYPE_IN: The type of the input arguments to the pattern.
1138 * TYPE_OUT: The type of the output of this pattern.
1140 * Return value: A new stmt that will be used to replace the sequence of
1141 stmts that constitute the pattern. In this case it will be:
1142 WIDEN_SUM <x_t, sum_0>
1144 Note: The widening-sum idiom is a widening reduction pattern that is
1145 vectorized without preserving all the intermediate results. It
1146 produces only N/2 (widened) results (by summing up pairs of
1147 intermediate results) rather than all N results. Therefore, we
1148 cannot allow this pattern when we want to get all the results and in
1149 the correct order (as is the case when this computation is in an
1150 inner-loop nested in an outer-loop that us being vectorized). */
1153 vect_recog_widen_sum_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1156 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
1157 tree oprnd0
, oprnd1
;
1158 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1159 tree type
, half_type
;
1160 gimple
*pattern_stmt
;
1161 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1169 loop
= LOOP_VINFO_LOOP (loop_info
);
1171 /* We don't allow changing the order of the computation in the inner-loop
1172 when doing outer-loop vectorization. */
1173 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1176 if (!is_gimple_assign (last_stmt
))
1179 type
= gimple_expr_type (last_stmt
);
1181 /* Look for the following pattern
1184 In which DX is at least double the size of X, and sum_1 has been
1185 recognized as a reduction variable.
1188 /* Starting from LAST_STMT, follow the defs of its uses in search
1189 of the above pattern. */
1191 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1194 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1195 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1196 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1197 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1200 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1201 we know that oprnd1 is the reduction variable (defined by a loop-header
1202 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1203 Left to check that oprnd0 is defined by a cast from type 'type' to type
1206 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1211 oprnd0
= gimple_assign_rhs1 (stmt
);
1212 *type_in
= half_type
;
1215 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1216 var
= vect_recog_temp_ssa_var (type
, NULL
);
1217 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1219 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE
, vect_location
,
1222 "vect_recog_widen_sum_pattern: detected: ");
1223 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1224 dump_printf (MSG_NOTE
, "\n");
1227 return pattern_stmt
;
1231 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1234 STMT - a statement to check.
1235 DEF - we support operations with two operands, one of which is constant.
1236 The other operand can be defined by a demotion operation, or by a
1237 previous statement in a sequence of over-promoted operations. In the
1238 later case DEF is used to replace that operand. (It is defined by a
1239 pattern statement we created for the previous statement in the
1243 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1244 NULL, it's the type of DEF.
1245 STMTS - additional pattern statements. If a pattern statement (type
1246 conversion) is created in this function, its original statement is
1250 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1251 operands to use in the new pattern statement for STMT (will be created
1252 in vect_recog_over_widening_pattern ()).
1253 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1254 statements for STMT: the first one is a type promotion and the second
1255 one is the operation itself. We return the type promotion statement
1256 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1257 the second pattern statement. */
1260 vect_operation_fits_smaller_type (gimple
*stmt
, tree def
, tree
*new_type
,
1261 tree
*op0
, tree
*op1
, gimple
**new_def_stmt
,
1262 vec
<gimple
*> *stmts
)
1264 enum tree_code code
;
1265 tree const_oprnd
, oprnd
;
1266 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1267 gimple
*def_stmt
, *new_stmt
;
1273 *new_def_stmt
= NULL
;
1275 if (!is_gimple_assign (stmt
))
1278 code
= gimple_assign_rhs_code (stmt
);
1279 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1280 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1283 oprnd
= gimple_assign_rhs1 (stmt
);
1284 const_oprnd
= gimple_assign_rhs2 (stmt
);
1285 type
= gimple_expr_type (stmt
);
1287 if (TREE_CODE (oprnd
) != SSA_NAME
1288 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1291 /* If oprnd has other uses besides that in stmt we cannot mark it
1292 as being part of a pattern only. */
1293 if (!has_single_use (oprnd
))
1296 /* If we are in the middle of a sequence, we use DEF from a previous
1297 statement. Otherwise, OPRND has to be a result of type promotion. */
1300 half_type
= *new_type
;
1306 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1309 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1313 /* Can we perform the operation on a smaller type? */
1319 if (!int_fits_type_p (const_oprnd
, half_type
))
1321 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1322 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1325 interm_type
= build_nonstandard_integer_type (
1326 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1327 if (!int_fits_type_p (const_oprnd
, interm_type
))
1334 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1335 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1338 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1339 (e.g., if the original value was char, the shift amount is at most 8
1340 if we want to use short). */
1341 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1344 interm_type
= build_nonstandard_integer_type (
1345 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1347 if (!vect_supportable_shift (code
, interm_type
))
1353 if (vect_supportable_shift (code
, half_type
))
1356 /* Try intermediate type - HALF_TYPE is not supported. */
1357 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1360 interm_type
= build_nonstandard_integer_type (
1361 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1363 if (!vect_supportable_shift (code
, interm_type
))
1372 /* There are four possible cases:
1373 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1374 the first statement in the sequence)
1375 a. The original, HALF_TYPE, is not enough - we replace the promotion
1376 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1377 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1379 2. OPRND is defined by a pattern statement we created.
1380 a. Its type is not sufficient for the operation, we create a new stmt:
1381 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1382 this statement in NEW_DEF_STMT, and it is later put in
1383 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1384 b. OPRND is good to use in the new statement. */
1389 /* Replace the original type conversion HALF_TYPE->TYPE with
1390 HALF_TYPE->INTERM_TYPE. */
1391 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1393 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1394 /* Check if the already created pattern stmt is what we need. */
1395 if (!is_gimple_assign (new_stmt
)
1396 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1397 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1400 stmts
->safe_push (def_stmt
);
1401 oprnd
= gimple_assign_lhs (new_stmt
);
1405 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1406 oprnd
= gimple_assign_rhs1 (def_stmt
);
1407 new_oprnd
= make_ssa_name (interm_type
);
1408 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1409 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1410 stmts
->safe_push (def_stmt
);
1416 /* Retrieve the operand before the type promotion. */
1417 oprnd
= gimple_assign_rhs1 (def_stmt
);
1424 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1425 new_oprnd
= make_ssa_name (interm_type
);
1426 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1428 *new_def_stmt
= new_stmt
;
1431 /* Otherwise, OPRND is already set. */
1435 *new_type
= interm_type
;
1437 *new_type
= half_type
;
1440 *op1
= fold_convert (*new_type
, const_oprnd
);
1446 /* Try to find a statement or a sequence of statements that can be performed
1450 TYPE x_T, res0_T, res1_T;
1453 S2 x_T = (TYPE) x_t;
1454 S3 res0_T = op (x_T, C0);
1455 S4 res1_T = op (res0_T, C1);
1456 S5 ... = () res1_T; - type demotion
1458 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1460 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1461 be 'type' or some intermediate type. For now, we expect S5 to be a type
1462 demotion operation. We also check that S3 and S4 have only one use. */
1465 vect_recog_over_widening_pattern (vec
<gimple
*> *stmts
,
1466 tree
*type_in
, tree
*type_out
)
1468 gimple
*stmt
= stmts
->pop ();
1469 gimple
*pattern_stmt
= NULL
, *new_def_stmt
, *prev_stmt
= NULL
,
1471 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1472 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1479 if (!vinfo_for_stmt (stmt
)
1480 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1483 new_def_stmt
= NULL
;
1484 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1485 &op0
, &op1
, &new_def_stmt
,
1494 /* STMT can be performed on a smaller type. Check its uses. */
1495 use_stmt
= vect_single_imm_use (stmt
);
1496 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1499 /* Create pattern statement for STMT. */
1500 vectype
= get_vectype_for_scalar_type (new_type
);
1504 /* We want to collect all the statements for which we create pattern
1505 statetments, except for the case when the last statement in the
1506 sequence doesn't have a corresponding pattern statement. In such
1507 case we associate the last pattern statement with the last statement
1508 in the sequence. Therefore, we only add the original statement to
1509 the list if we know that it is not the last. */
1511 stmts
->safe_push (prev_stmt
);
1513 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1515 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1516 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1517 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1519 if (dump_enabled_p ())
1521 dump_printf_loc (MSG_NOTE
, vect_location
,
1522 "created pattern stmt: ");
1523 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1524 dump_printf (MSG_NOTE
, "\n");
1527 type
= gimple_expr_type (stmt
);
1534 /* We got a sequence. We expect it to end with a type demotion operation.
1535 Otherwise, we quit (for now). There are three possible cases: the
1536 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1537 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1538 NEW_TYPE differs (we create a new conversion statement). */
1539 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1541 use_lhs
= gimple_assign_lhs (use_stmt
);
1542 use_type
= TREE_TYPE (use_lhs
);
1543 /* Support only type demotion or signedess change. */
1544 if (!INTEGRAL_TYPE_P (use_type
)
1545 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1548 /* Check that NEW_TYPE is not bigger than the conversion result. */
1549 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1552 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1553 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1555 /* Create NEW_TYPE->USE_TYPE conversion. */
1556 new_oprnd
= make_ssa_name (use_type
);
1557 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1558 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1560 *type_in
= get_vectype_for_scalar_type (new_type
);
1561 *type_out
= get_vectype_for_scalar_type (use_type
);
1563 /* We created a pattern statement for the last statement in the
1564 sequence, so we don't need to associate it with the pattern
1565 statement created for PREV_STMT. Therefore, we add PREV_STMT
1566 to the list in order to mark it later in vect_pattern_recog_1. */
1568 stmts
->safe_push (prev_stmt
);
1573 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1574 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1577 *type_out
= NULL_TREE
;
1580 stmts
->safe_push (use_stmt
);
1583 /* TODO: support general case, create a conversion to the correct type. */
1586 /* Pattern detected. */
1587 if (dump_enabled_p ())
1589 dump_printf_loc (MSG_NOTE
, vect_location
,
1590 "vect_recog_over_widening_pattern: detected: ");
1591 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1592 dump_printf (MSG_NOTE
, "\n");
1595 return pattern_stmt
;
1598 /* Detect widening shift pattern:
1604 S2 a_T = (TYPE) a_t;
1605 S3 res_T = a_T << CONST;
1607 where type 'TYPE' is at least double the size of type 'type'.
1609 Also detect cases where the shift result is immediately converted
1610 to another type 'result_type' that is no larger in size than 'TYPE'.
1611 In those cases we perform a widen-shift that directly results in
1612 'result_type', to avoid a possible over-widening situation:
1616 result_type res_result;
1619 S2 a_T = (TYPE) a_t;
1620 S3 res_T = a_T << CONST;
1621 S4 res_result = (result_type) res_T;
1622 '--> res_result' = a_t w<< CONST;
1624 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1625 create an additional pattern stmt for S2 to create a variable of an
1626 intermediate type, and perform widen-shift on the intermediate type:
1630 TYPE a_T, res_T, res_T';
1633 S2 a_T = (TYPE) a_t;
1634 '--> a_it = (interm_type) a_t;
1635 S3 res_T = a_T << CONST;
1636 '--> res_T' = a_it <<* CONST;
1640 * STMTS: Contains a stmt from which the pattern search begins.
1641 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1642 in STMTS. When an intermediate type is used and a pattern statement is
1643 created for S2, we also put S2 here (before S3).
1647 * TYPE_IN: The type of the input arguments to the pattern.
1649 * TYPE_OUT: The type of the output of this pattern.
1651 * Return value: A new stmt that will be used to replace the sequence of
1652 stmts that constitute the pattern. In this case it will be:
1653 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1656 vect_recog_widen_shift_pattern (vec
<gimple
*> *stmts
,
1657 tree
*type_in
, tree
*type_out
)
1659 gimple
*last_stmt
= stmts
->pop ();
1661 tree oprnd0
, oprnd1
;
1662 tree type
, half_type0
;
1663 gimple
*pattern_stmt
;
1664 tree vectype
, vectype_out
= NULL_TREE
;
1666 enum tree_code dummy_code
;
1668 vec
<tree
> dummy_vec
;
1672 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1675 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1678 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1681 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1682 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1683 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1686 /* Check operand 0: it has to be defined by a type promotion. */
1687 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1692 /* Check operand 1: has to be positive. We check that it fits the type
1693 in vect_handle_widen_op_by_const (). */
1694 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1697 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1698 type
= gimple_expr_type (last_stmt
);
1700 /* Check for subsequent conversion to another type. */
1701 use_stmt
= vect_single_imm_use (last_stmt
);
1702 if (use_stmt
&& is_gimple_assign (use_stmt
)
1703 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1704 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1706 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1707 tree use_type
= TREE_TYPE (use_lhs
);
1709 if (INTEGRAL_TYPE_P (use_type
)
1710 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1712 last_stmt
= use_stmt
;
1717 /* Check if this a widening operation. */
1718 gimple
*wstmt
= NULL
;
1719 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1721 type
, &half_type0
, def_stmt0
))
1724 /* Pattern detected. */
1725 if (dump_enabled_p ())
1726 dump_printf_loc (MSG_NOTE
, vect_location
,
1727 "vect_recog_widen_shift_pattern: detected:\n");
1729 /* Check target support. */
1730 vectype
= get_vectype_for_scalar_type (half_type0
);
1731 vectype_out
= get_vectype_for_scalar_type (type
);
1735 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1736 vectype_out
, vectype
,
1737 &dummy_code
, &dummy_code
,
1738 &dummy_int
, &dummy_vec
))
1742 *type_out
= vectype_out
;
1744 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1745 var
= vect_recog_temp_ssa_var (type
, NULL
);
1747 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1750 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1751 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1752 stmt_vec_info new_stmt_info
1753 = new_stmt_vec_info (wstmt
, stmt_vinfo
->vinfo
);
1754 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1755 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1758 if (dump_enabled_p ())
1759 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1761 stmts
->safe_push (last_stmt
);
1762 return pattern_stmt
;
1765 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1769 S0 a_t = b_t r<< c_t;
1773 * STMTS: Contains a stmt from which the pattern search begins,
1774 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1778 S2 e_t = d_t & (B - 1);
1779 S3 f_t = b_t << c_t;
1780 S4 g_t = b_t >> e_t;
1783 where B is element bitsize of type.
1787 * TYPE_IN: The type of the input arguments to the pattern.
1789 * TYPE_OUT: The type of the output of this pattern.
1791 * Return value: A new stmt that will be used to replace the rotate
1795 vect_recog_rotate_pattern (vec
<gimple
*> *stmts
, tree
*type_in
, tree
*type_out
)
1797 gimple
*last_stmt
= stmts
->pop ();
1798 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1799 gimple
*pattern_stmt
, *def_stmt
;
1800 enum tree_code rhs_code
;
1801 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1802 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1803 enum vect_def_type dt
;
1804 optab optab1
, optab2
;
1805 edge ext_def
= NULL
;
1807 if (!is_gimple_assign (last_stmt
))
1810 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1820 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1823 lhs
= gimple_assign_lhs (last_stmt
);
1824 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1825 type
= TREE_TYPE (oprnd0
);
1826 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1827 if (TREE_CODE (oprnd0
) != SSA_NAME
1828 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1829 || !INTEGRAL_TYPE_P (type
)
1830 || !TYPE_UNSIGNED (type
))
1833 if (!vect_is_simple_use (oprnd1
, last_stmt
, vinfo
, &def_stmt
, &def
, &dt
))
1836 if (dt
!= vect_internal_def
1837 && dt
!= vect_constant_def
1838 && dt
!= vect_external_def
)
1841 vectype
= get_vectype_for_scalar_type (type
);
1842 if (vectype
== NULL_TREE
)
1845 /* If vector/vector or vector/scalar rotate is supported by the target,
1846 don't do anything here. */
1847 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1849 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1852 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
1854 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1856 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1860 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1861 don't do anything here either. */
1862 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1863 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1865 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1867 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1869 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
1871 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1872 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1874 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1876 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1881 *type_out
= vectype
;
1882 if (*type_in
== NULL_TREE
)
1885 if (dt
== vect_external_def
1886 && TREE_CODE (oprnd1
) == SSA_NAME
1887 && is_a
<loop_vec_info
> (vinfo
))
1889 struct loop
*loop
= as_a
<loop_vec_info
> (vinfo
)->loop
;
1890 ext_def
= loop_preheader_edge (loop
);
1891 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1893 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1895 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1901 if (TREE_CODE (oprnd1
) == INTEGER_CST
1902 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1904 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1906 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1907 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1908 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1909 == TYPE_PRECISION (type
))
1913 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1914 if (def
== NULL_TREE
)
1916 def
= vect_recog_temp_ssa_var (type
, NULL
);
1917 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1921 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1922 gcc_assert (!new_bb
);
1925 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1927 stype
= TREE_TYPE (def
);
1929 if (TREE_CODE (def
) == INTEGER_CST
)
1931 if (!tree_fits_uhwi_p (def
)
1932 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1933 || integer_zerop (def
))
1935 def2
= build_int_cst (stype
,
1936 GET_MODE_PRECISION (TYPE_MODE (type
))
1937 - tree_to_uhwi (def
));
1941 tree vecstype
= get_vectype_for_scalar_type (stype
);
1942 stmt_vec_info def_stmt_vinfo
;
1944 if (vecstype
== NULL_TREE
)
1946 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1947 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1951 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1952 gcc_assert (!new_bb
);
1956 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1957 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1958 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1959 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1962 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1964 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1965 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1966 gimple_assign_lhs (def_stmt
), mask
);
1970 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1971 gcc_assert (!new_bb
);
1975 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1976 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1977 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1978 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1982 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1983 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
1984 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
1986 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1988 var2
= vect_recog_temp_ssa_var (type
, NULL
);
1989 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
1990 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
1992 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1994 /* Pattern detected. */
1995 if (dump_enabled_p ())
1996 dump_printf_loc (MSG_NOTE
, vect_location
,
1997 "vect_recog_rotate_pattern: detected:\n");
1999 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2000 var
= vect_recog_temp_ssa_var (type
, NULL
);
2001 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2003 if (dump_enabled_p ())
2004 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2006 stmts
->safe_push (last_stmt
);
2007 return pattern_stmt
;
2010 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2018 S3 res_T = b_T op a_t;
2020 where type 'TYPE' is a type with different size than 'type',
2021 and op is <<, >> or rotate.
2026 TYPE b_T, c_T, res_T;
2029 S1 a_t = (type) c_T;
2031 S3 res_T = b_T op a_t;
2035 * STMTS: Contains a stmt from which the pattern search begins,
2036 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2037 with a shift/rotate which has same type on both operands, in the
2038 second case just b_T op c_T, in the first case with added cast
2039 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2043 * TYPE_IN: The type of the input arguments to the pattern.
2045 * TYPE_OUT: The type of the output of this pattern.
2047 * Return value: A new stmt that will be used to replace the shift/rotate
2051 vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *stmts
,
2052 tree
*type_in
, tree
*type_out
)
2054 gimple
*last_stmt
= stmts
->pop ();
2055 tree oprnd0
, oprnd1
, lhs
, var
;
2056 gimple
*pattern_stmt
, *def_stmt
;
2057 enum tree_code rhs_code
;
2058 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2059 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2060 enum vect_def_type dt
;
2063 if (!is_gimple_assign (last_stmt
))
2066 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2078 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2081 lhs
= gimple_assign_lhs (last_stmt
);
2082 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2083 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2084 if (TREE_CODE (oprnd0
) != SSA_NAME
2085 || TREE_CODE (oprnd1
) != SSA_NAME
2086 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2087 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2088 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2089 || TYPE_PRECISION (TREE_TYPE (lhs
))
2090 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2093 if (!vect_is_simple_use (oprnd1
, last_stmt
, vinfo
, &def_stmt
,
2097 if (dt
!= vect_internal_def
)
2100 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2101 *type_out
= *type_in
;
2102 if (*type_in
== NULL_TREE
)
2106 if (gimple_assign_cast_p (def_stmt
))
2108 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2109 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2110 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2111 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2115 if (def
== NULL_TREE
)
2117 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2118 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2119 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2122 /* Pattern detected. */
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_NOTE
, vect_location
,
2125 "vect_recog_vector_vector_shift_pattern: detected:\n");
2127 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2128 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2129 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2131 if (dump_enabled_p ())
2132 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2134 stmts
->safe_push (last_stmt
);
2135 return pattern_stmt
;
2138 /* Detect multiplication by constant which are postive or negatives of power 2,
2139 and convert them to shift patterns.
2141 Mult with constants that are postive power of two.
2148 Mult with constants that are negative power of two.
2153 STMTS: Contains a stmt from which the pattern search begins,
2154 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2155 constant operand is a power of 2.
2157 S1': b_t = a_t << log2 (n)
2159 Convert the mult operation to LSHIFT and followed by a NEGATE
2160 if constant operand is a negative power of 2.
2161 type a_t, b_t, res_T;
2162 S2': b_t = a_t << log2 (n)
2163 S3': res_T = - (b_t)
2167 * TYPE_IN: The type of the input arguments to the pattern.
2169 * TYPE_OUT: The type of the output of this pattern.
2171 * Return value: A new stmt that will be used to replace the multiplication
2175 vect_recog_mult_pattern (vec
<gimple
*> *stmts
,
2176 tree
*type_in
, tree
*type_out
)
2178 gimple
*last_stmt
= stmts
->pop ();
2179 tree oprnd0
, oprnd1
, vectype
, itype
;
2180 gimple
*pattern_stmt
, *def_stmt
;
2182 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2183 int power2_val
, power2_neg_val
;
2186 if (!is_gimple_assign (last_stmt
))
2189 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2192 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2193 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2194 itype
= TREE_TYPE (oprnd0
);
2196 if (TREE_CODE (oprnd0
) != SSA_NAME
2197 || TREE_CODE (oprnd1
) != INTEGER_CST
2198 || !INTEGRAL_TYPE_P (itype
)
2199 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2202 vectype
= get_vectype_for_scalar_type (itype
);
2203 if (vectype
== NULL_TREE
)
2206 /* If the target can handle vectorized multiplication natively,
2207 don't attempt to optimize this. */
2208 optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2209 if (optab
!= unknown_optab
)
2211 machine_mode vec_mode
= TYPE_MODE (vectype
);
2212 int icode
= (int) optab_handler (optab
, vec_mode
);
2213 if (icode
!= CODE_FOR_nothing
)
2217 /* If target cannot handle vector left shift then we cannot
2218 optimize and bail out. */
2219 optab
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2221 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2224 power2_val
= wi::exact_log2 (oprnd1
);
2225 power2_neg_val
= wi::exact_log2 (wi::neg (oprnd1
));
2227 /* Handle constant operands that are postive or negative powers of 2. */
2228 if (power2_val
!= -1)
2230 shift
= build_int_cst (itype
, power2_val
);
2232 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2233 LSHIFT_EXPR
, oprnd0
, shift
);
2235 else if (power2_neg_val
!= -1)
2237 /* If the target cannot handle vector NEGATE then we cannot
2238 do the optimization. */
2239 optab
= optab_for_tree_code (NEGATE_EXPR
, vectype
, optab_vector
);
2241 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2244 shift
= build_int_cst (itype
, power2_neg_val
);
2246 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2247 LSHIFT_EXPR
, oprnd0
, shift
);
2248 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2250 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2251 NEGATE_EXPR
, gimple_assign_lhs (def_stmt
));
2256 /* Pattern detected. */
2257 if (dump_enabled_p ())
2258 dump_printf_loc (MSG_NOTE
, vect_location
,
2259 "vect_recog_mult_pattern: detected:\n");
2261 if (dump_enabled_p ())
2262 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
,
2265 stmts
->safe_push (last_stmt
);
2267 *type_out
= vectype
;
2269 return pattern_stmt
;
2272 /* Detect a signed division by a constant that wouldn't be
2273 otherwise vectorized:
2279 where type 'type' is an integral type and N is a constant.
2281 Similarly handle modulo by a constant:
2287 * STMTS: Contains a stmt from which the pattern search begins,
2288 i.e. the division stmt. S1 is replaced by if N is a power
2289 of two constant and type is signed:
2290 S3 y_t = b_t < 0 ? N - 1 : 0;
2292 S1' a_t = x_t >> log2 (N);
2294 S4 is replaced if N is a power of two constant and
2295 type is signed by (where *_T temporaries have unsigned type):
2296 S9 y_T = b_t < 0 ? -1U : 0U;
2297 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2298 S7 z_t = (type) z_T;
2300 S5 x_t = w_t & (N - 1);
2301 S4' a_t = x_t - z_t;
2305 * TYPE_IN: The type of the input arguments to the pattern.
2307 * TYPE_OUT: The type of the output of this pattern.
2309 * Return value: A new stmt that will be used to replace the division
2310 S1 or modulo S4 stmt. */
2313 vect_recog_divmod_pattern (vec
<gimple
*> *stmts
,
2314 tree
*type_in
, tree
*type_out
)
2316 gimple
*last_stmt
= stmts
->pop ();
2317 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2318 gimple
*pattern_stmt
, *def_stmt
;
2319 enum tree_code rhs_code
;
2320 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2321 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2324 int dummy_int
, prec
;
2325 stmt_vec_info def_stmt_vinfo
;
2327 if (!is_gimple_assign (last_stmt
))
2330 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2333 case TRUNC_DIV_EXPR
:
2334 case TRUNC_MOD_EXPR
:
2340 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2343 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2344 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2345 itype
= TREE_TYPE (oprnd0
);
2346 if (TREE_CODE (oprnd0
) != SSA_NAME
2347 || TREE_CODE (oprnd1
) != INTEGER_CST
2348 || TREE_CODE (itype
) != INTEGER_TYPE
2349 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2352 vectype
= get_vectype_for_scalar_type (itype
);
2353 if (vectype
== NULL_TREE
)
2356 /* If the target can handle vectorized division or modulo natively,
2357 don't attempt to optimize this. */
2358 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2359 if (optab
!= unknown_optab
)
2361 machine_mode vec_mode
= TYPE_MODE (vectype
);
2362 int icode
= (int) optab_handler (optab
, vec_mode
);
2363 if (icode
!= CODE_FOR_nothing
)
2367 prec
= TYPE_PRECISION (itype
);
2368 if (integer_pow2p (oprnd1
))
2370 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2373 /* Pattern detected. */
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_NOTE
, vect_location
,
2376 "vect_recog_divmod_pattern: detected:\n");
2378 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2379 build_int_cst (itype
, 0));
2380 if (rhs_code
== TRUNC_DIV_EXPR
)
2382 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2385 = gimple_build_assign (var
, COND_EXPR
, cond
,
2386 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2387 build_int_cst (itype
, 1)),
2388 build_int_cst (itype
, 0));
2389 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2390 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2392 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2393 gimple_assign_lhs (def_stmt
));
2394 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2396 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2398 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2399 RSHIFT_EXPR
, var
, shift
);
2404 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2405 if (compare_tree_int (oprnd1
, 2) == 0)
2407 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2408 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2409 build_int_cst (itype
, 1),
2410 build_int_cst (itype
, 0));
2411 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2416 = build_nonstandard_integer_type (prec
, 1);
2417 tree vecutype
= get_vectype_for_scalar_type (utype
);
2419 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2420 - tree_log2 (oprnd1
));
2421 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2423 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2424 build_int_cst (utype
, -1),
2425 build_int_cst (utype
, 0));
2426 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2427 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2428 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2429 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2430 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2431 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2432 gimple_assign_lhs (def_stmt
),
2434 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2435 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2436 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2437 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2438 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2440 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2441 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2444 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2445 PLUS_EXPR
, oprnd0
, signmask
);
2446 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2448 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2449 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2450 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2451 build_int_cst (itype
, 1)));
2452 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2455 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2456 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2460 if (dump_enabled_p ())
2461 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2464 stmts
->safe_push (last_stmt
);
2467 *type_out
= vectype
;
2468 return pattern_stmt
;
2471 if (prec
> HOST_BITS_PER_WIDE_INT
2472 || integer_zerop (oprnd1
))
2475 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2478 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2480 if (TYPE_UNSIGNED (itype
))
2482 unsigned HOST_WIDE_INT mh
, ml
;
2483 int pre_shift
, post_shift
;
2484 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2485 & GET_MODE_MASK (TYPE_MODE (itype
)));
2486 tree t1
, t2
, t3
, t4
;
2488 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2489 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2492 /* Find a suitable multiplier and right shift count
2493 instead of multiplying with D. */
2494 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2496 /* If the suggested multiplier is more than SIZE bits, we can do better
2497 for even divisors, using an initial right shift. */
2498 if (mh
!= 0 && (d
& 1) == 0)
2500 pre_shift
= floor_log2 (d
& -d
);
2501 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2502 &ml
, &post_shift
, &dummy_int
);
2510 if (post_shift
- 1 >= prec
)
2513 /* t1 = oprnd0 h* ml;
2517 q = t4 >> (post_shift - 1); */
2518 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2519 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2520 build_int_cst (itype
, ml
));
2521 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2523 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2525 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2526 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2528 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2530 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2531 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2533 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2535 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2537 if (post_shift
!= 1)
2539 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2541 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2543 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2544 build_int_cst (itype
, post_shift
- 1));
2549 pattern_stmt
= def_stmt
;
2554 if (pre_shift
>= prec
|| post_shift
>= prec
)
2557 /* t1 = oprnd0 >> pre_shift;
2559 q = t2 >> post_shift; */
2562 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2564 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2565 build_int_cst (NULL
, pre_shift
));
2566 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2571 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2572 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2573 build_int_cst (itype
, ml
));
2577 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2579 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2581 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2582 build_int_cst (itype
, post_shift
));
2587 pattern_stmt
= def_stmt
;
2592 unsigned HOST_WIDE_INT ml
;
2594 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2595 unsigned HOST_WIDE_INT abs_d
;
2597 tree t1
, t2
, t3
, t4
;
2599 /* Give up for -1. */
2603 /* Since d might be INT_MIN, we have to cast to
2604 unsigned HOST_WIDE_INT before negating to avoid
2605 undefined signed overflow. */
2607 ? (unsigned HOST_WIDE_INT
) d
2608 : - (unsigned HOST_WIDE_INT
) d
);
2610 /* n rem d = n rem -d */
2611 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2614 oprnd1
= build_int_cst (itype
, abs_d
);
2616 else if (HOST_BITS_PER_WIDE_INT
>= prec
2617 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2618 /* This case is not handled correctly below. */
2621 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2622 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2625 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2627 if (post_shift
>= prec
)
2630 /* t1 = oprnd0 h* ml; */
2631 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2632 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2633 build_int_cst (itype
, ml
));
2637 /* t2 = t1 + oprnd0; */
2638 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2639 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2640 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2647 /* t3 = t2 >> post_shift; */
2648 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2649 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2650 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2651 build_int_cst (itype
, post_shift
));
2656 wide_int oprnd0_min
, oprnd0_max
;
2658 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2660 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2662 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2666 if (msb
== 0 && d
>= 0)
2670 pattern_stmt
= def_stmt
;
2674 /* t4 = oprnd0 >> (prec - 1);
2675 or if we know from VRP that oprnd0 >= 0
2677 or if we know from VRP that oprnd0 < 0
2679 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2680 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2682 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2683 build_int_cst (itype
, msb
));
2685 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2686 build_int_cst (itype
, prec
- 1));
2687 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2689 /* q = t3 - t4; or q = t4 - t3; */
2690 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2691 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2696 if (rhs_code
== TRUNC_MOD_EXPR
)
2700 /* We divided. Now finish by:
2703 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2705 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2706 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2707 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2709 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2710 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2713 /* Pattern detected. */
2714 if (dump_enabled_p ())
2716 dump_printf_loc (MSG_NOTE
, vect_location
,
2717 "vect_recog_divmod_pattern: detected: ");
2718 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2719 dump_printf (MSG_NOTE
, "\n");
2722 stmts
->safe_push (last_stmt
);
2725 *type_out
= vectype
;
2726 return pattern_stmt
;
2729 /* Function vect_recog_mixed_size_cond_pattern
2731 Try to find the following pattern:
2736 S1 a_T = x_t CMP y_t ? b_T : c_T;
2738 where type 'TYPE' is an integral type which has different size
2739 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2740 than 'type', the constants need to fit into an integer type
2741 with the same width as 'type') or results of conversion from 'type'.
2745 * LAST_STMT: A stmt from which the pattern search begins.
2749 * TYPE_IN: The type of the input arguments to the pattern.
2751 * TYPE_OUT: The type of the output of this pattern.
2753 * Return value: A new stmt that will be used to replace the pattern.
2754 Additionally a def_stmt is added.
2756 a_it = x_t CMP y_t ? b_it : c_it;
2757 a_T = (TYPE) a_it; */
2760 vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
2763 gimple
*last_stmt
= (*stmts
)[0];
2764 tree cond_expr
, then_clause
, else_clause
;
2765 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2766 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2767 gimple
*pattern_stmt
, *def_stmt
;
2768 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2769 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2770 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
2772 tree comp_scalar_type
;
2774 if (!is_gimple_assign (last_stmt
)
2775 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2776 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2779 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2780 then_clause
= gimple_assign_rhs2 (last_stmt
);
2781 else_clause
= gimple_assign_rhs3 (last_stmt
);
2783 if (!COMPARISON_CLASS_P (cond_expr
))
2786 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2787 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2788 if (comp_vectype
== NULL_TREE
)
2791 type
= gimple_expr_type (last_stmt
);
2792 if (types_compatible_p (type
, comp_scalar_type
)
2793 || ((TREE_CODE (then_clause
) != INTEGER_CST
2794 || TREE_CODE (else_clause
) != INTEGER_CST
)
2795 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2796 || !INTEGRAL_TYPE_P (type
))
2799 if ((TREE_CODE (then_clause
) != INTEGER_CST
2800 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2801 &def_stmt0
, &promotion
))
2802 || (TREE_CODE (else_clause
) != INTEGER_CST
2803 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2804 &def_stmt1
, &promotion
)))
2807 if (orig_type0
&& orig_type1
2808 && !types_compatible_p (orig_type0
, orig_type1
))
2813 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2815 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2821 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2823 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2828 HOST_WIDE_INT cmp_mode_size
2829 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
2831 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == cmp_mode_size
)
2834 vectype
= get_vectype_for_scalar_type (type
);
2835 if (vectype
== NULL_TREE
)
2838 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2841 if (itype
== NULL_TREE
)
2842 itype
= build_nonstandard_integer_type (cmp_mode_size
,
2843 TYPE_UNSIGNED (type
));
2845 if (itype
== NULL_TREE
2846 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != cmp_mode_size
)
2849 vecitype
= get_vectype_for_scalar_type (itype
);
2850 if (vecitype
== NULL_TREE
)
2853 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2856 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > cmp_mode_size
)
2858 if ((TREE_CODE (then_clause
) == INTEGER_CST
2859 && !int_fits_type_p (then_clause
, itype
))
2860 || (TREE_CODE (else_clause
) == INTEGER_CST
2861 && !int_fits_type_p (else_clause
, itype
)))
2865 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2866 COND_EXPR
, unshare_expr (cond_expr
),
2867 fold_convert (itype
, then_clause
),
2868 fold_convert (itype
, else_clause
));
2869 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2870 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2872 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2873 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
2874 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2875 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2876 *type_in
= vecitype
;
2877 *type_out
= vectype
;
2879 if (dump_enabled_p ())
2880 dump_printf_loc (MSG_NOTE
, vect_location
,
2881 "vect_recog_mixed_size_cond_pattern: detected:\n");
2883 return pattern_stmt
;
2887 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2888 true if bool VAR can be optimized that way. */
2891 check_bool_pattern (tree var
, vec_info
*vinfo
)
2894 enum vect_def_type dt
;
2896 enum tree_code rhs_code
;
2898 if (!vect_is_simple_use (var
, NULL
, vinfo
, &def_stmt
, &def
,
2902 if (dt
!= vect_internal_def
)
2905 if (!is_gimple_assign (def_stmt
))
2908 if (!has_single_use (def
))
2911 rhs1
= gimple_assign_rhs1 (def_stmt
);
2912 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2916 return check_bool_pattern (rhs1
, vinfo
);
2919 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2920 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2921 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2923 return check_bool_pattern (rhs1
, vinfo
);
2926 return check_bool_pattern (rhs1
, vinfo
);
2931 if (!check_bool_pattern (rhs1
, vinfo
))
2933 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
);
2936 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2938 tree vecitype
, comp_vectype
;
2940 /* If the comparison can throw, then is_gimple_condexpr will be
2941 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2942 if (stmt_could_throw_p (def_stmt
))
2945 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2946 if (comp_vectype
== NULL_TREE
)
2949 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2951 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2953 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2954 vecitype
= get_vectype_for_scalar_type (itype
);
2955 if (vecitype
== NULL_TREE
)
2959 vecitype
= comp_vectype
;
2960 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2967 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2968 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2969 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2972 adjust_bool_pattern_cast (tree type
, tree var
)
2974 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2975 gimple
*cast_stmt
, *pattern_stmt
;
2977 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2978 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2979 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2980 cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2981 NOP_EXPR
, gimple_assign_lhs (pattern_stmt
));
2982 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2983 return gimple_assign_lhs (cast_stmt
);
2987 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2988 recursively. VAR is an SSA_NAME that should be transformed from bool
2989 to a wider integer type, OUT_TYPE is the desired final integer type of
2990 the whole pattern, TRUEVAL should be NULL unless optimizing
2991 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2992 in the then_clause, STMTS is where statements with added pattern stmts
2993 should be pushed to. */
2996 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2997 vec
<gimple
*> *stmts
)
2999 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3000 enum tree_code rhs_code
, def_rhs_code
;
3001 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3003 gimple
*pattern_stmt
, *def_stmt
;
3005 rhs1
= gimple_assign_rhs1 (stmt
);
3006 rhs2
= gimple_assign_rhs2 (stmt
);
3007 rhs_code
= gimple_assign_rhs_code (stmt
);
3008 loc
= gimple_location (stmt
);
3013 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3014 itype
= TREE_TYPE (irhs1
);
3016 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3021 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3022 itype
= TREE_TYPE (irhs1
);
3024 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3025 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3029 /* Try to optimize x = y & (a < b ? 1 : 0); into
3030 x = (a < b ? y : 0);
3036 S1 a_b = x1 CMP1 y1;
3037 S2 b_b = x2 CMP2 y2;
3039 S4 d_T = (TYPE) c_b;
3041 we would normally emit:
3043 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3044 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3045 S3' c_T = a_T & b_T;
3048 but we can save one stmt by using the
3049 result of one of the COND_EXPRs in the other COND_EXPR and leave
3050 BIT_AND_EXPR stmt out:
3052 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3053 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3056 At least when VEC_COND_EXPR is implemented using masks
3057 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3058 computes the comparison masks and ands it, in one case with
3059 all ones vector, in the other case with a vector register.
3060 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3061 often more expensive. */
3062 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3063 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3064 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3066 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3067 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3068 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3069 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3072 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3073 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
3074 tstmt
= stmts
->pop ();
3075 gcc_assert (tstmt
== def_stmt
);
3076 stmts
->quick_push (stmt
);
3077 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3078 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3079 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3080 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3084 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3087 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3088 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3089 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3091 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3092 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3093 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3094 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3097 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3098 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3099 tstmt
= stmts
->pop ();
3100 gcc_assert (tstmt
== def_stmt
);
3101 stmts
->quick_push (stmt
);
3102 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3103 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3104 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3105 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3109 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3115 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3116 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3118 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3119 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3121 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3122 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3123 int out_prec
= TYPE_PRECISION (out_type
);
3124 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3125 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3126 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3127 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3130 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3131 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3134 itype
= TREE_TYPE (irhs1
);
3136 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3137 rhs_code
, irhs1
, irhs2
);
3141 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3142 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3143 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3144 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3145 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3147 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3149 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3152 itype
= TREE_TYPE (rhs1
);
3153 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3154 if (trueval
== NULL_TREE
)
3155 trueval
= build_int_cst (itype
, 1);
3157 gcc_checking_assert (useless_type_conversion_p (itype
,
3158 TREE_TYPE (trueval
)));
3160 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3161 COND_EXPR
, cond_expr
, trueval
,
3162 build_int_cst (itype
, 0));
3166 stmts
->safe_push (stmt
);
3167 gimple_set_location (pattern_stmt
, loc
);
3168 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3169 return gimple_assign_lhs (pattern_stmt
);
3173 /* Function vect_recog_bool_pattern
3175 Try to find pattern like following:
3177 bool a_b, b_b, c_b, d_b, e_b;
3180 S1 a_b = x1 CMP1 y1;
3181 S2 b_b = x2 CMP2 y2;
3183 S4 d_b = x3 CMP3 y3;
3185 S6 f_T = (TYPE) e_b;
3187 where type 'TYPE' is an integral type. Or a similar pattern
3190 S6 f_Y = e_b ? r_Y : s_Y;
3192 as results from if-conversion of a complex condition.
3196 * LAST_STMT: A stmt at the end from which the pattern
3197 search begins, i.e. cast of a bool to
3202 * TYPE_IN: The type of the input arguments to the pattern.
3204 * TYPE_OUT: The type of the output of this pattern.
3206 * Return value: A new stmt that will be used to replace the pattern.
3208 Assuming size of TYPE is the same as size of all comparisons
3209 (otherwise some casts would be added where needed), the above
3210 sequence we create related pattern stmts:
3211 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3212 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3213 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3214 S5' e_T = c_T | d_T;
3217 Instead of the above S3' we could emit:
3218 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3219 S3' c_T = a_T | b_T;
3220 but the above is more efficient. */
3223 vect_recog_bool_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
3226 gimple
*last_stmt
= stmts
->pop ();
3227 enum tree_code rhs_code
;
3228 tree var
, lhs
, rhs
, vectype
;
3229 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3230 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3231 gimple
*pattern_stmt
;
3233 if (!is_gimple_assign (last_stmt
))
3236 var
= gimple_assign_rhs1 (last_stmt
);
3237 lhs
= gimple_assign_lhs (last_stmt
);
3239 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3240 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3241 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3244 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3245 if (CONVERT_EXPR_CODE_P (rhs_code
))
3247 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3248 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3250 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3251 if (vectype
== NULL_TREE
)
3254 if (!check_bool_pattern (var
, vinfo
))
3257 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3258 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3259 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3260 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3263 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3264 *type_out
= vectype
;
3266 stmts
->safe_push (last_stmt
);
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_NOTE
, vect_location
,
3269 "vect_recog_bool_pattern: detected:\n");
3271 return pattern_stmt
;
3273 else if (rhs_code
== COND_EXPR
3274 && TREE_CODE (var
) == SSA_NAME
)
3276 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3277 if (vectype
== NULL_TREE
)
3280 /* Build a scalar type for the boolean result that when
3281 vectorized matches the vector type of the result in
3282 size and number of elements. */
3284 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3285 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3287 = build_nonstandard_integer_type (prec
,
3288 TYPE_UNSIGNED (TREE_TYPE (var
)));
3289 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3292 if (!check_bool_pattern (var
, vinfo
))
3295 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3296 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3298 = gimple_build_assign (lhs
, COND_EXPR
,
3299 build2 (NE_EXPR
, boolean_type_node
,
3300 rhs
, build_int_cst (type
, 0)),
3301 gimple_assign_rhs2 (last_stmt
),
3302 gimple_assign_rhs3 (last_stmt
));
3303 *type_out
= vectype
;
3305 stmts
->safe_push (last_stmt
);
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_NOTE
, vect_location
,
3308 "vect_recog_bool_pattern: detected:\n");
3310 return pattern_stmt
;
3312 else if (rhs_code
== SSA_NAME
3313 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3315 stmt_vec_info pattern_stmt_info
;
3316 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3317 gcc_assert (vectype
!= NULL_TREE
);
3318 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3320 if (!check_bool_pattern (var
, vinfo
))
3323 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3324 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3325 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3327 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3328 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3329 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3332 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3333 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3334 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3335 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3336 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3337 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3338 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3339 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3340 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3341 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3342 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3343 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3344 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3345 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3346 *type_out
= vectype
;
3348 stmts
->safe_push (last_stmt
);
3349 if (dump_enabled_p ())
3350 dump_printf_loc (MSG_NOTE
, vect_location
,
3351 "vect_recog_bool_pattern: detected:\n");
3352 return pattern_stmt
;
3359 /* Mark statements that are involved in a pattern. */
3362 vect_mark_pattern_stmts (gimple
*orig_stmt
, gimple
*pattern_stmt
,
3363 tree pattern_vectype
)
3365 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3366 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3367 vec_info
*vinfo
= orig_stmt_info
->vinfo
;
3370 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3371 if (pattern_stmt_info
== NULL
)
3373 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3374 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3376 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3378 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3379 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3380 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3381 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3382 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3383 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3384 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3385 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3386 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3388 gimple_stmt_iterator si
;
3389 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3390 !gsi_end_p (si
); gsi_next (&si
))
3392 def_stmt
= gsi_stmt (si
);
3393 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3394 if (def_stmt_info
== NULL
)
3396 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
3397 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3399 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3400 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3401 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3402 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3403 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3408 /* Function vect_pattern_recog_1
3411 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3412 computation pattern.
3413 STMT: A stmt from which the pattern search should start.
3415 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3416 expression that computes the same functionality and can be used to
3417 replace the sequence of stmts that are involved in the pattern.
3420 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3421 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3422 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3423 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3424 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3425 to the available target pattern.
3427 This function also does some bookkeeping, as explained in the documentation
3428 for vect_recog_pattern. */
3431 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3432 gimple_stmt_iterator si
,
3433 vec
<gimple
*> *stmts_to_replace
)
3435 gimple
*stmt
= gsi_stmt (si
), *pattern_stmt
;
3436 stmt_vec_info stmt_info
;
3437 loop_vec_info loop_vinfo
;
3438 tree pattern_vectype
;
3439 tree type_in
, type_out
;
3440 enum tree_code code
;
3444 stmts_to_replace
->truncate (0);
3445 stmts_to_replace
->quick_push (stmt
);
3446 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3450 stmt
= stmts_to_replace
->last ();
3451 stmt_info
= vinfo_for_stmt (stmt
);
3452 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3454 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3456 /* No need to check target support (already checked by the pattern
3457 recognition function). */
3458 pattern_vectype
= type_out
? type_out
: type_in
;
3462 machine_mode vec_mode
;
3463 enum insn_code icode
;
3466 /* Check target support */
3467 type_in
= get_vectype_for_scalar_type (type_in
);
3471 type_out
= get_vectype_for_scalar_type (type_out
);
3476 pattern_vectype
= type_out
;
3478 if (is_gimple_assign (pattern_stmt
))
3479 code
= gimple_assign_rhs_code (pattern_stmt
);
3482 gcc_assert (is_gimple_call (pattern_stmt
));
3486 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3487 vec_mode
= TYPE_MODE (type_in
);
3489 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3490 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3494 /* Found a vectorizable pattern. */
3495 if (dump_enabled_p ())
3497 dump_printf_loc (MSG_NOTE
, vect_location
,
3498 "pattern recognized: ");
3499 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3502 /* Mark the stmts that are involved in the pattern. */
3503 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3505 /* Patterns cannot be vectorized using SLP, because they change the order of
3508 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3510 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3512 /* It is possible that additional pattern stmts are created and inserted in
3513 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3514 relevant statements. */
3515 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3516 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3519 stmt_info
= vinfo_for_stmt (stmt
);
3520 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3521 if (dump_enabled_p ())
3523 dump_printf_loc (MSG_NOTE
, vect_location
,
3524 "additional pattern stmt: ");
3525 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3528 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3533 /* Function vect_pattern_recog
3536 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3539 Output - for each computation idiom that is detected we create a new stmt
3540 that provides the same functionality and that can be vectorized. We
3541 also record some information in the struct_stmt_info of the relevant
3542 stmts, as explained below:
3544 At the entry to this function we have the following stmts, with the
3545 following initial value in the STMT_VINFO fields:
3547 stmt in_pattern_p related_stmt vec_stmt
3548 S1: a_i = .... - - -
3549 S2: a_2 = ..use(a_i).. - - -
3550 S3: a_1 = ..use(a_2).. - - -
3551 S4: a_0 = ..use(a_1).. - - -
3552 S5: ... = ..use(a_0).. - - -
3554 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3555 represented by a single stmt. We then:
3556 - create a new stmt S6 equivalent to the pattern (the stmt is not
3557 inserted into the code)
3558 - fill in the STMT_VINFO fields as follows:
3560 in_pattern_p related_stmt vec_stmt
3561 S1: a_i = .... - - -
3562 S2: a_2 = ..use(a_i).. - - -
3563 S3: a_1 = ..use(a_2).. - - -
3564 S4: a_0 = ..use(a_1).. true S6 -
3565 '---> S6: a_new = .... - S4 -
3566 S5: ... = ..use(a_0).. - - -
3568 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3569 to each other through the RELATED_STMT field).
3571 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3572 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3573 remain irrelevant unless used by stmts other than S4.
3575 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3576 (because they are marked as irrelevant). It will vectorize S6, and record
3577 a pointer to the new vector stmt VS6 from S6 (as usual).
3578 S4 will be skipped, and S5 will be vectorized as usual:
3580 in_pattern_p related_stmt vec_stmt
3581 S1: a_i = .... - - -
3582 S2: a_2 = ..use(a_i).. - - -
3583 S3: a_1 = ..use(a_2).. - - -
3584 > VS6: va_new = .... - - -
3585 S4: a_0 = ..use(a_1).. true S6 VS6
3586 '---> S6: a_new = .... - S4 VS6
3587 > VS5: ... = ..vuse(va_new).. - - -
3588 S5: ... = ..use(a_0).. - - -
3590 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3591 elsewhere), and we'll end up with:
3594 VS5: ... = ..vuse(va_new)..
3596 In case of more than one pattern statements, e.g., widen-mult with
3600 S2 a_T = (TYPE) a_t;
3601 '--> S3: a_it = (interm_type) a_t;
3602 S4 prod_T = a_T * CONST;
3603 '--> S5: prod_T' = a_it w* CONST;
3605 there may be other users of a_T outside the pattern. In that case S2 will
3606 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3607 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3608 be recorded in S3. */
3611 vect_pattern_recog (vec_info
*vinfo
)
3616 gimple_stmt_iterator si
;
3618 vect_recog_func_ptr vect_recog_func
;
3619 auto_vec
<gimple
*, 1> stmts_to_replace
;
3622 if (dump_enabled_p ())
3623 dump_printf_loc (MSG_NOTE
, vect_location
,
3624 "=== vect_pattern_recog ===\n");
3626 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
3628 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3629 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3630 nbbs
= loop
->num_nodes
;
3634 bbs
= &as_a
<bb_vec_info
> (vinfo
)->bb
;
3638 /* Scan through the loop stmts, applying the pattern recognition
3639 functions starting at each stmt visited: */
3640 for (i
= 0; i
< nbbs
; i
++)
3642 basic_block bb
= bbs
[i
];
3643 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3645 if (is_a
<bb_vec_info
> (vinfo
)
3646 && (stmt
= gsi_stmt (si
))
3647 && vinfo_for_stmt (stmt
)
3648 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3651 /* Scan over all generic vect_recog_xxx_pattern functions. */
3652 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3654 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3655 vect_pattern_recog_1 (vect_recog_func
, si
,