1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
47 #include "omp-simd-clone.h"
50 /* Report that we've found an instance of pattern PATTERN in
54 vect_pattern_detected (const char *name
, gimple
*stmt
)
56 if (dump_enabled_p ())
58 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: ", name
);
59 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
64 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
66 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
71 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
73 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
74 append_pattern_def_seq (stmt_info
, stmt
);
77 /* Return true if the target supports a vector version of CODE,
78 where CODE is known to map to a direct optab. ITYPE specifies
79 the type of (some of) the scalar inputs and OTYPE specifies the
80 type of the scalar result.
82 If CODE allows the inputs and outputs to have different type
83 (such as for WIDEN_SUM_EXPR), it is the input mode rather
84 than the output mode that determines the appropriate target pattern.
85 Operand 0 of the target pattern then specifies the mode that the output
88 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
89 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
93 vect_supportable_direct_optab_p (tree otype
, tree_code code
,
94 tree itype
, tree
*vecotype_out
,
95 tree
*vecitype_out
= NULL
)
97 tree vecitype
= get_vectype_for_scalar_type (itype
);
101 tree vecotype
= get_vectype_for_scalar_type (otype
);
105 optab optab
= optab_for_tree_code (code
, vecitype
, optab_default
);
109 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
110 if (icode
== CODE_FOR_nothing
111 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
114 *vecotype_out
= vecotype
;
116 *vecitype_out
= vecitype
;
120 /* Check whether STMT2 is in the same loop or basic block as STMT1.
121 Which of the two applies depends on whether we're currently doing
122 loop-based or basic-block-based vectorization, as determined by
123 the vinfo_for_stmt for STMT1 (which must be defined).
125 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
126 to be defined as well. */
129 vect_same_loop_or_bb_p (gimple
*stmt1
, gimple
*stmt2
)
131 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
132 return vect_stmt_in_region_p (stmt_vinfo
->vinfo
, stmt2
);
135 /* If the LHS of DEF_STMT has a single use, and that statement is
136 in the same loop or basic block, return it. */
139 vect_single_imm_use (gimple
*def_stmt
)
141 tree lhs
= gimple_assign_lhs (def_stmt
);
145 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
148 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
154 /* If OP is defined by a statement that's being considered for vectorization,
155 return information about that statement, otherwise return NULL. */
158 vect_get_internal_def (vec_info
*vinfo
, tree op
)
162 if (TREE_CODE (op
) != SSA_NAME
163 || !vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
)
164 || dt
!= vect_internal_def
)
167 return vinfo_for_stmt (def_stmt
);
170 /* Check whether NAME, an ssa-name used in USE_STMT,
171 is a result of a type promotion, such that:
172 DEF_STMT: NAME = NOP (name0)
173 If CHECK_SIGN is TRUE, check that either both types are signed or both are
177 type_conversion_p (tree name
, gimple
*use_stmt
, bool check_sign
,
178 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
180 gimple
*dummy_gimple
;
181 stmt_vec_info stmt_vinfo
;
182 tree type
= TREE_TYPE (name
);
184 enum vect_def_type dt
;
186 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
187 if (!vect_is_simple_use (name
, stmt_vinfo
->vinfo
, def_stmt
, &dt
))
190 if (dt
!= vect_internal_def
191 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
197 if (dt
== vect_internal_def
)
199 stmt_vec_info def_vinfo
= vinfo_for_stmt (*def_stmt
);
200 if (STMT_VINFO_IN_PATTERN_P (def_vinfo
))
204 if (!is_gimple_assign (*def_stmt
))
207 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
210 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
212 *orig_type
= TREE_TYPE (oprnd0
);
213 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
214 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
217 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
222 if (!vect_is_simple_use (oprnd0
, stmt_vinfo
->vinfo
, &dummy_gimple
, &dt
))
228 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
229 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
232 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
234 return make_temp_ssa_name (type
, stmt
, "patt");
237 /* Return true if STMT_VINFO describes a reduction for which reassociation
238 is allowed. If STMT_INFO is part of a group, assume that it's part of
239 a reduction chain and optimistically assume that all statements
240 except the last allow reassociation. */
243 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo
)
245 return (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
246 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo
) != FOLD_LEFT_REDUCTION
247 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo
) != NULL
);
250 /* Function vect_recog_dot_prod_pattern
252 Try to find the following pattern:
258 sum_0 = phi <init, sum_1>
261 S3 x_T = (TYPE1) x_t;
262 S4 y_T = (TYPE1) y_t;
264 [S6 prod = (TYPE2) prod; #optional]
265 S7 sum_1 = prod + sum_0;
267 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
268 same size of 'TYPE1' or bigger. This is a special case of a reduction
273 * STMTS: Contains a stmt from which the pattern search begins. In the
274 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
279 * TYPE_OUT: The type of the output of this pattern.
281 * Return value: A new stmt that will be used to replace the sequence of
282 stmts that constitute the pattern. In this case it will be:
283 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
285 Note: The dot-prod idiom is a widening reduction pattern that is
286 vectorized without preserving all the intermediate results. It
287 produces only N/2 (widened) results (by summing up pairs of
288 intermediate results) rather than all N results. Therefore, we
289 cannot allow this pattern when we want to get all the results and in
290 the correct order (as is the case when this computation is in an
291 inner-loop nested in an outer-loop that us being vectorized). */
294 vect_recog_dot_prod_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
296 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
298 tree oprnd00
, oprnd01
;
299 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
300 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
301 tree type
, half_type
;
302 gimple
*pattern_stmt
;
304 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
312 loop
= LOOP_VINFO_LOOP (loop_info
);
314 /* We don't allow changing the order of the computation in the inner-loop
315 when doing outer-loop vectorization. */
316 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
319 if (!is_gimple_assign (last_stmt
))
322 type
= gimple_expr_type (last_stmt
);
324 /* Look for the following pattern
328 DDPROD = (TYPE2) DPROD;
329 sum_1 = DDPROD + sum_0;
331 - DX is double the size of X
332 - DY is double the size of Y
333 - DX, DY, DPROD all have the same type
334 - sum is the same size of DPROD or bigger
335 - sum has been recognized as a reduction variable.
337 This is equivalent to:
338 DPROD = X w* Y; #widen mult
339 sum_1 = DPROD w+ sum_0; #widen summation
341 DPROD = X w* Y; #widen mult
342 sum_1 = DPROD + sum_0; #summation
345 /* Starting from LAST_STMT, follow the defs of its uses in search
346 of the above pattern. */
348 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
351 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
354 if (!vect_reassociating_reduction_p (stmt_vinfo
))
357 oprnd0
= gimple_assign_rhs1 (last_stmt
);
358 oprnd1
= gimple_assign_rhs2 (last_stmt
);
362 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
367 oprnd0
= gimple_assign_rhs1 (stmt
);
372 /* So far so good. Since last_stmt was detected as a (summation) reduction,
373 we know that oprnd1 is the reduction variable (defined by a loop-header
374 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
375 Left to check that oprnd0 is defined by a (widen_)mult_expr */
376 if (TREE_CODE (oprnd0
) != SSA_NAME
)
379 prod_type
= half_type
;
380 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
384 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
385 inside the loop (in case we are analyzing an outer-loop). */
386 gassign
*mult
= dyn_cast
<gassign
*> (mult_vinfo
->stmt
);
387 if (!mult
|| gimple_assign_rhs_code (mult
) != MULT_EXPR
)
389 if (STMT_VINFO_IN_PATTERN_P (mult_vinfo
))
391 /* Has been detected as a widening multiplication? */
393 mult
= dyn_cast
<gassign
*> (STMT_VINFO_RELATED_STMT (mult_vinfo
));
394 if (!mult
|| gimple_assign_rhs_code (mult
) != WIDEN_MULT_EXPR
)
396 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
)
397 = STMT_VINFO_PATTERN_DEF_SEQ (mult_vinfo
);
398 mult_vinfo
= vinfo_for_stmt (mult
);
399 gcc_assert (mult_vinfo
);
400 gcc_assert (STMT_VINFO_DEF_TYPE (mult_vinfo
) == vect_internal_def
);
401 oprnd00
= gimple_assign_rhs1 (mult
);
402 oprnd01
= gimple_assign_rhs2 (mult
);
406 tree half_type0
, half_type1
;
410 oprnd0
= gimple_assign_rhs1 (mult
);
411 oprnd1
= gimple_assign_rhs2 (mult
);
412 if (!type_conversion_p (oprnd0
, mult
, true, &half_type0
, &def_stmt
,
416 oprnd00
= gimple_assign_rhs1 (def_stmt
);
417 if (!type_conversion_p (oprnd1
, mult
, true, &half_type1
, &def_stmt
,
421 oprnd01
= gimple_assign_rhs1 (def_stmt
);
422 if (!types_compatible_p (half_type0
, half_type1
))
424 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
428 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
430 half_type
= TREE_TYPE (oprnd00
);
431 if (!vect_supportable_direct_optab_p (type
, DOT_PROD_EXPR
, half_type
,
435 var
= vect_recog_temp_ssa_var (type
, NULL
);
436 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
437 oprnd00
, oprnd01
, oprnd1
);
443 /* Function vect_recog_sad_pattern
445 Try to find the following Sum of Absolute Difference (SAD) pattern:
448 signed TYPE1 diff, abs_diff;
451 sum_0 = phi <init, sum_1>
454 S3 x_T = (TYPE1) x_t;
455 S4 y_T = (TYPE1) y_t;
457 S6 abs_diff = ABS_EXPR <diff>;
458 [S7 abs_diff = (TYPE2) abs_diff; #optional]
459 S8 sum_1 = abs_diff + sum_0;
461 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
462 same size of 'TYPE1' or bigger. This is a special case of a reduction
467 * STMTS: Contains a stmt from which the pattern search begins. In the
468 example, when this function is called with S8, the pattern
469 {S3,S4,S5,S6,S7,S8} will be detected.
473 * TYPE_OUT: The type of the output of this pattern.
475 * Return value: A new stmt that will be used to replace the sequence of
476 stmts that constitute the pattern. In this case it will be:
477 SAD_EXPR <x_t, y_t, sum_0>
481 vect_recog_sad_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
483 gimple
*last_stmt
= (*stmts
)[0];
484 tree sad_oprnd0
, sad_oprnd1
;
485 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
486 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
488 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
495 loop
= LOOP_VINFO_LOOP (loop_info
);
497 /* We don't allow changing the order of the computation in the inner-loop
498 when doing outer-loop vectorization. */
499 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
502 if (!is_gimple_assign (last_stmt
))
505 tree sum_type
= gimple_expr_type (last_stmt
);
507 /* Look for the following pattern
511 DAD = ABS_EXPR <DDIFF>;
512 DDPROD = (TYPE2) DPROD;
515 - DX is at least double the size of X
516 - DY is at least double the size of Y
517 - DX, DY, DDIFF, DAD all have the same type
518 - sum is the same size of DAD or bigger
519 - sum has been recognized as a reduction variable.
521 This is equivalent to:
522 DDIFF = X w- Y; #widen sub
523 DAD = ABS_EXPR <DDIFF>;
524 sum_1 = DAD w+ sum_0; #widen summation
526 DDIFF = X w- Y; #widen sub
527 DAD = ABS_EXPR <DDIFF>;
528 sum_1 = DAD + sum_0; #summation
531 /* Starting from LAST_STMT, follow the defs of its uses in search
532 of the above pattern. */
534 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
537 tree plus_oprnd0
, plus_oprnd1
;
539 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
542 if (!vect_reassociating_reduction_p (stmt_vinfo
))
545 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
546 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
548 /* The type conversion could be promotion, demotion,
549 or just signed -> unsigned. */
551 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
552 &half_type
, &def_stmt
, &promotion
))
553 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
555 half_type
= sum_type
;
557 /* So far so good. Since last_stmt was detected as a (summation) reduction,
558 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
559 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
560 Then check that plus_oprnd0 is defined by an abs_expr. */
562 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
565 tree abs_type
= half_type
;
566 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
570 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
571 inside the loop (in case we are analyzing an outer-loop). */
572 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
574 || (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
575 && gimple_assign_rhs_code (abs_stmt
) != ABSU_EXPR
))
578 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
579 if (TYPE_UNSIGNED (abs_type
))
582 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
584 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
587 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
588 if (!diff_stmt_vinfo
)
591 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
592 inside the loop (in case we are analyzing an outer-loop). */
593 gassign
*diff_stmt
= dyn_cast
<gassign
*> (diff_stmt_vinfo
->stmt
);
594 if (!diff_stmt
|| gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
597 tree half_type0
, half_type1
;
599 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
600 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
602 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
603 &half_type0
, &def_stmt
, &promotion
)
606 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
608 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
609 &half_type1
, &def_stmt
, &promotion
)
612 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
614 if (!types_compatible_p (half_type0
, half_type1
))
616 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
617 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
620 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
622 if (!vect_supportable_direct_optab_p (sum_type
, SAD_EXPR
,
623 TREE_TYPE (sad_oprnd0
), type_out
))
626 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
627 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
628 sad_oprnd1
, plus_oprnd1
);
634 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
637 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
638 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
640 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
641 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
642 that satisfies the above restrictions, we can perform a widening opeartion
643 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
644 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
647 vect_handle_widen_op_by_const (gimple
*stmt
, enum tree_code code
,
648 tree const_oprnd
, tree
*oprnd
,
649 gimple
**wstmt
, tree type
,
650 tree
*half_type
, gimple
*def_stmt
)
652 tree new_type
, new_oprnd
;
654 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
657 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
658 || (code
== LSHIFT_EXPR
659 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
661 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
663 /* CONST_OPRND is a constant of HALF_TYPE. */
664 *oprnd
= gimple_assign_rhs1 (def_stmt
);
668 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
671 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
674 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
675 a type 2 times bigger than HALF_TYPE. */
676 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
677 TYPE_UNSIGNED (type
));
678 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
679 || (code
== LSHIFT_EXPR
680 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
683 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
684 *oprnd
= gimple_assign_rhs1 (def_stmt
);
685 new_oprnd
= make_ssa_name (new_type
);
686 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
689 *half_type
= new_type
;
694 /* Function vect_recog_widen_mult_pattern
696 Try to find the following pattern:
700 TYPE a_T, b_T, prod_T;
706 S5 prod_T = a_T * b_T;
708 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
710 Also detect unsigned cases:
714 unsigned TYPE u_prod_T;
715 TYPE a_T, b_T, prod_T;
721 S5 prod_T = a_T * b_T;
722 S6 u_prod_T = (unsigned TYPE) prod_T;
724 and multiplication by constants:
731 S5 prod_T = a_T * CONST;
733 A special case of multiplication by constants is when 'TYPE' is 4 times
734 bigger than 'type', but CONST fits an intermediate type 2 times smaller
735 than 'TYPE'. In that case we create an additional pattern stmt for S3
736 to create a variable of the intermediate type, and perform widen-mult
737 on the intermediate type as well:
741 TYPE a_T, prod_T, prod_T';
745 '--> a_it = (interm_type) a_t;
746 S5 prod_T = a_T * CONST;
747 '--> prod_T' = a_it w* CONST;
751 * STMTS: Contains a stmt from which the pattern search begins. In the
752 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
753 is detected. In case of unsigned widen-mult, the original stmt (S5) is
754 replaced with S6 in STMTS. In case of multiplication by a constant
755 of an intermediate type (the last case above), STMTS also contains S3
756 (inserted before S5).
760 * TYPE_OUT: The type of the output of this pattern.
762 * Return value: A new stmt that will be used to replace the sequence of
763 stmts that constitute the pattern. In this case it will be:
764 WIDEN_MULT <a_t, b_t>
765 If the result of WIDEN_MULT needs to be converted to a larger type, the
766 returned stmt will be this type conversion stmt.
770 vect_recog_widen_mult_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
772 gimple
*last_stmt
= stmts
->pop ();
773 gimple
*def_stmt0
, *def_stmt1
;
775 tree type
, half_type0
, half_type1
;
776 gimple
*new_stmt
= NULL
, *pattern_stmt
= NULL
;
777 tree vectype
, vecitype
;
779 enum tree_code dummy_code
;
785 if (!is_gimple_assign (last_stmt
))
788 type
= gimple_expr_type (last_stmt
);
790 /* Starting from LAST_STMT, follow the defs of its uses in search
791 of the above pattern. */
793 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
796 oprnd0
= gimple_assign_rhs1 (last_stmt
);
797 oprnd1
= gimple_assign_rhs2 (last_stmt
);
799 /* Check argument 0. */
800 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
804 /* Check argument 1. */
805 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
806 &def_stmt1
, &promotion
);
808 if (op1_ok
&& promotion
)
810 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
811 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
815 if (TREE_CODE (oprnd1
) == INTEGER_CST
816 && TREE_CODE (half_type0
) == INTEGER_TYPE
817 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
818 &oprnd0
, &new_stmt
, type
,
819 &half_type0
, def_stmt0
))
821 half_type1
= half_type0
;
822 oprnd1
= fold_convert (half_type1
, oprnd1
);
828 /* If the two arguments have different sizes, convert the one with
829 the smaller type into the larger type. */
830 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
832 /* If we already used up the single-stmt slot give up. */
837 gimple
*def_stmt
= NULL
;
839 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
841 def_stmt
= def_stmt0
;
842 half_type0
= half_type1
;
847 def_stmt
= def_stmt1
;
848 half_type1
= half_type0
;
852 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
853 tree new_oprnd
= make_ssa_name (half_type0
);
854 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
858 /* Handle unsigned case. Look for
859 S6 u_prod_T = (unsigned TYPE) prod_T;
860 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
861 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
867 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
870 use_stmt
= vect_single_imm_use (last_stmt
);
871 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
872 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
875 use_lhs
= gimple_assign_lhs (use_stmt
);
876 use_type
= TREE_TYPE (use_lhs
);
877 if (!INTEGRAL_TYPE_P (use_type
)
878 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
879 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
883 last_stmt
= use_stmt
;
886 if (!types_compatible_p (half_type0
, half_type1
))
889 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
890 to get an intermediate result of type ITYPE. In this case we need
891 to build a statement to convert this intermediate result to type TYPE. */
893 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
894 itype
= build_nonstandard_integer_type
895 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0
)) * 2,
896 TYPE_UNSIGNED (type
));
898 /* Pattern detected. */
899 vect_pattern_detected ("vect_recog_widen_mult_pattern", last_stmt
);
901 /* Check target support */
902 vectype
= get_vectype_for_scalar_type (half_type0
);
903 vecitype
= get_vectype_for_scalar_type (itype
);
906 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
908 &dummy_code
, &dummy_code
,
909 &dummy_int
, &dummy_vec
))
912 *type_out
= get_vectype_for_scalar_type (type
);
916 /* Pattern supported. Create a stmt to be used to replace the pattern: */
917 var
= vect_recog_temp_ssa_var (itype
, NULL
);
918 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
920 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
921 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
923 /* If the original two operands have different sizes, we may need to convert
924 the smaller one into the larget type. If this is the case, at this point
925 the new stmt is already built. */
928 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
929 stmt_vec_info new_stmt_info
930 = new_stmt_vec_info (new_stmt
, stmt_vinfo
->vinfo
);
931 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
932 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
935 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
936 the result of the widen-mult operation into type TYPE. */
939 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
940 stmt_vec_info pattern_stmt_info
941 = new_stmt_vec_info (pattern_stmt
, stmt_vinfo
->vinfo
);
942 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
943 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
944 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
946 gimple_assign_lhs (pattern_stmt
));
949 stmts
->safe_push (last_stmt
);
954 /* Function vect_recog_pow_pattern
956 Try to find the following pattern:
960 with POW being one of pow, powf, powi, powif and N being
965 * LAST_STMT: A stmt from which the pattern search begins.
969 * TYPE_OUT: The type of the output of this pattern.
971 * Return value: A new stmt that will be used to replace the sequence of
972 stmts that constitute the pattern. In this case it will be:
979 vect_recog_pow_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
981 gimple
*last_stmt
= (*stmts
)[0];
986 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
989 switch (gimple_call_combined_fn (last_stmt
))
999 base
= gimple_call_arg (last_stmt
, 0);
1000 exp
= gimple_call_arg (last_stmt
, 1);
1001 if (TREE_CODE (exp
) != REAL_CST
1002 && TREE_CODE (exp
) != INTEGER_CST
)
1004 if (flag_unsafe_math_optimizations
1005 && TREE_CODE (base
) == REAL_CST
1006 && !gimple_call_internal_p (last_stmt
))
1008 combined_fn log_cfn
;
1009 built_in_function exp_bfn
;
1010 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
1013 log_cfn
= CFN_BUILT_IN_LOG
;
1014 exp_bfn
= BUILT_IN_EXP
;
1017 log_cfn
= CFN_BUILT_IN_LOGF
;
1018 exp_bfn
= BUILT_IN_EXPF
;
1021 log_cfn
= CFN_BUILT_IN_LOGL
;
1022 exp_bfn
= BUILT_IN_EXPL
;
1027 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
1028 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
1029 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1030 does that, but if C is a power of 2, we want to use
1031 exp2 (log2 (C) * x) in the non-vectorized version, but for
1032 vectorization we don't have vectorized exp2. */
1034 && TREE_CODE (logc
) == REAL_CST
1036 && lookup_attribute ("omp declare simd",
1037 DECL_ATTRIBUTES (exp_decl
)))
1039 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
1040 if (node
->simd_clones
== NULL
)
1042 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
1043 || node
->definition
)
1045 expand_simd_clones (node
);
1046 if (node
->simd_clones
== NULL
)
1049 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1052 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1053 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1054 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
1055 new_pattern_def_seq (stmt_vinfo
, g
);
1056 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1057 g
= gimple_build_call (exp_decl
, 1, def
);
1058 gimple_call_set_lhs (g
, res
);
1066 /* We now have a pow or powi builtin function call with a constant
1069 /* Catch squaring. */
1070 if ((tree_fits_shwi_p (exp
)
1071 && tree_to_shwi (exp
) == 2)
1072 || (TREE_CODE (exp
) == REAL_CST
1073 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1075 if (!vect_supportable_direct_optab_p (TREE_TYPE (base
), MULT_EXPR
,
1076 TREE_TYPE (base
), type_out
))
1079 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1080 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1084 /* Catch square root. */
1085 if (TREE_CODE (exp
) == REAL_CST
1086 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1088 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1090 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
1091 OPTIMIZE_FOR_SPEED
))
1093 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1094 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1095 gimple_call_set_lhs (stmt
, var
);
1096 gimple_call_set_nothrow (stmt
, true);
1105 /* Function vect_recog_widen_sum_pattern
1107 Try to find the following pattern:
1110 TYPE x_T, sum = init;
1112 sum_0 = phi <init, sum_1>
1114 S2 x_T = (TYPE) x_t;
1115 S3 sum_1 = x_T + sum_0;
1117 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1118 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1119 a special case of a reduction computation.
1123 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1124 when this function is called with S3, the pattern {S2,S3} will be detected.
1128 * TYPE_OUT: The type of the output of this pattern.
1130 * Return value: A new stmt that will be used to replace the sequence of
1131 stmts that constitute the pattern. In this case it will be:
1132 WIDEN_SUM <x_t, sum_0>
1134 Note: The widening-sum idiom is a widening reduction pattern that is
1135 vectorized without preserving all the intermediate results. It
1136 produces only N/2 (widened) results (by summing up pairs of
1137 intermediate results) rather than all N results. Therefore, we
1138 cannot allow this pattern when we want to get all the results and in
1139 the correct order (as is the case when this computation is in an
1140 inner-loop nested in an outer-loop that us being vectorized). */
1143 vect_recog_widen_sum_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1145 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
1146 tree oprnd0
, oprnd1
;
1147 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1148 tree type
, half_type
;
1149 gimple
*pattern_stmt
;
1150 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1158 loop
= LOOP_VINFO_LOOP (loop_info
);
1160 /* We don't allow changing the order of the computation in the inner-loop
1161 when doing outer-loop vectorization. */
1162 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1165 if (!is_gimple_assign (last_stmt
))
1168 type
= gimple_expr_type (last_stmt
);
1170 /* Look for the following pattern
1173 In which DX is at least double the size of X, and sum_1 has been
1174 recognized as a reduction variable.
1177 /* Starting from LAST_STMT, follow the defs of its uses in search
1178 of the above pattern. */
1180 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1183 if (!vect_reassociating_reduction_p (stmt_vinfo
))
1186 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1187 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1189 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1190 we know that oprnd1 is the reduction variable (defined by a loop-header
1191 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1192 Left to check that oprnd0 is defined by a cast from type 'type' to type
1195 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1200 oprnd0
= gimple_assign_rhs1 (stmt
);
1202 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
1204 if (!vect_supportable_direct_optab_p (type
, WIDEN_SUM_EXPR
, half_type
,
1208 var
= vect_recog_temp_ssa_var (type
, NULL
);
1209 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1211 return pattern_stmt
;
1215 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1218 STMT - a statement to check.
1219 DEF - we support operations with two operands, one of which is constant.
1220 The other operand can be defined by a demotion operation, or by a
1221 previous statement in a sequence of over-promoted operations. In the
1222 later case DEF is used to replace that operand. (It is defined by a
1223 pattern statement we created for the previous statement in the
1227 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1228 NULL, it's the type of DEF.
1229 STMTS - additional pattern statements. If a pattern statement (type
1230 conversion) is created in this function, its original statement is
1234 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1235 operands to use in the new pattern statement for STMT (will be created
1236 in vect_recog_over_widening_pattern ()).
1237 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1238 statements for STMT: the first one is a type promotion and the second
1239 one is the operation itself. We return the type promotion statement
1240 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1241 the second pattern statement. */
1244 vect_operation_fits_smaller_type (gimple
*stmt
, tree def
, tree
*new_type
,
1245 tree
*op0
, tree
*op1
, gimple
**new_def_stmt
,
1246 vec
<gimple
*> *stmts
)
1248 enum tree_code code
;
1249 tree const_oprnd
, oprnd
;
1250 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1251 gimple
*def_stmt
, *new_stmt
;
1257 *new_def_stmt
= NULL
;
1259 if (!is_gimple_assign (stmt
))
1262 code
= gimple_assign_rhs_code (stmt
);
1263 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1264 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1267 oprnd
= gimple_assign_rhs1 (stmt
);
1268 const_oprnd
= gimple_assign_rhs2 (stmt
);
1269 type
= gimple_expr_type (stmt
);
1271 if (TREE_CODE (oprnd
) != SSA_NAME
1272 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1275 /* If oprnd has other uses besides that in stmt we cannot mark it
1276 as being part of a pattern only. */
1277 if (!has_single_use (oprnd
))
1280 /* If we are in the middle of a sequence, we use DEF from a previous
1281 statement. Otherwise, OPRND has to be a result of type promotion. */
1284 half_type
= *new_type
;
1290 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1293 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1297 /* Can we perform the operation on a smaller type? */
1303 if (!int_fits_type_p (const_oprnd
, half_type
))
1305 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1306 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1309 interm_type
= build_nonstandard_integer_type (
1310 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1311 if (!int_fits_type_p (const_oprnd
, interm_type
))
1318 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1319 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1322 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1323 (e.g., if the original value was char, the shift amount is at most 8
1324 if we want to use short). */
1325 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1328 interm_type
= build_nonstandard_integer_type (
1329 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1331 if (!vect_supportable_shift (code
, interm_type
))
1337 if (vect_supportable_shift (code
, half_type
))
1340 /* Try intermediate type - HALF_TYPE is not supported. */
1341 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1344 interm_type
= build_nonstandard_integer_type (
1345 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1347 if (!vect_supportable_shift (code
, interm_type
))
1356 /* There are four possible cases:
1357 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1358 the first statement in the sequence)
1359 a. The original, HALF_TYPE, is not enough - we replace the promotion
1360 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1361 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1363 2. OPRND is defined by a pattern statement we created.
1364 a. Its type is not sufficient for the operation, we create a new stmt:
1365 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1366 this statement in NEW_DEF_STMT, and it is later put in
1367 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1368 b. OPRND is good to use in the new statement. */
1373 /* Replace the original type conversion HALF_TYPE->TYPE with
1374 HALF_TYPE->INTERM_TYPE. */
1375 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1377 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1378 /* Check if the already created pattern stmt is what we need. */
1379 if (!is_gimple_assign (new_stmt
)
1380 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1381 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1384 stmts
->safe_push (def_stmt
);
1385 oprnd
= gimple_assign_lhs (new_stmt
);
1389 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1390 oprnd
= gimple_assign_rhs1 (def_stmt
);
1391 new_oprnd
= make_ssa_name (interm_type
);
1392 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1393 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1394 stmts
->safe_push (def_stmt
);
1400 /* Retrieve the operand before the type promotion. */
1401 oprnd
= gimple_assign_rhs1 (def_stmt
);
1408 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1409 new_oprnd
= make_ssa_name (interm_type
);
1410 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1412 *new_def_stmt
= new_stmt
;
1415 /* Otherwise, OPRND is already set. */
1419 *new_type
= interm_type
;
1421 *new_type
= half_type
;
1424 *op1
= fold_convert (*new_type
, const_oprnd
);
1430 /* Try to find a statement or a sequence of statements that can be performed
1434 TYPE x_T, res0_T, res1_T;
1437 S2 x_T = (TYPE) x_t;
1438 S3 res0_T = op (x_T, C0);
1439 S4 res1_T = op (res0_T, C1);
1440 S5 ... = () res1_T; - type demotion
1442 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1444 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1445 be 'type' or some intermediate type. For now, we expect S5 to be a type
1446 demotion operation. We also check that S3 and S4 have only one use. */
1449 vect_recog_over_widening_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1451 gimple
*stmt
= stmts
->pop ();
1452 gimple
*pattern_stmt
= NULL
, *new_def_stmt
, *prev_stmt
= NULL
,
1454 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1455 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1462 if (!vinfo_for_stmt (stmt
)
1463 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1466 new_def_stmt
= NULL
;
1467 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1468 &op0
, &op1
, &new_def_stmt
,
1477 /* STMT can be performed on a smaller type. Check its uses. */
1478 use_stmt
= vect_single_imm_use (stmt
);
1479 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1482 /* Create pattern statement for STMT. */
1483 vectype
= get_vectype_for_scalar_type (new_type
);
1487 /* We want to collect all the statements for which we create pattern
1488 statetments, except for the case when the last statement in the
1489 sequence doesn't have a corresponding pattern statement. In such
1490 case we associate the last pattern statement with the last statement
1491 in the sequence. Therefore, we only add the original statement to
1492 the list if we know that it is not the last. */
1494 stmts
->safe_push (prev_stmt
);
1496 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1498 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1499 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1500 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1502 if (dump_enabled_p ())
1504 dump_printf_loc (MSG_NOTE
, vect_location
,
1505 "created pattern stmt: ");
1506 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1509 type
= gimple_expr_type (stmt
);
1516 /* We got a sequence. We expect it to end with a type demotion operation.
1517 Otherwise, we quit (for now). There are three possible cases: the
1518 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1519 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1520 NEW_TYPE differs (we create a new conversion statement). */
1521 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1523 use_lhs
= gimple_assign_lhs (use_stmt
);
1524 use_type
= TREE_TYPE (use_lhs
);
1525 /* Support only type demotion or signedess change. */
1526 if (!INTEGRAL_TYPE_P (use_type
)
1527 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1530 /* Check that NEW_TYPE is not bigger than the conversion result. */
1531 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1534 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1535 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1537 *type_out
= get_vectype_for_scalar_type (use_type
);
1541 /* Create NEW_TYPE->USE_TYPE conversion. */
1542 new_oprnd
= make_ssa_name (use_type
);
1543 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1544 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1546 /* We created a pattern statement for the last statement in the
1547 sequence, so we don't need to associate it with the pattern
1548 statement created for PREV_STMT. Therefore, we add PREV_STMT
1549 to the list in order to mark it later in vect_pattern_recog_1. */
1551 stmts
->safe_push (prev_stmt
);
1556 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1557 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1559 *type_out
= vectype
;
1562 stmts
->safe_push (use_stmt
);
1565 /* TODO: support general case, create a conversion to the correct type. */
1568 /* Pattern detected. */
1569 vect_pattern_detected ("vect_recog_over_widening_pattern", stmts
->last ());
1571 return pattern_stmt
;
1574 /* Detect widening shift pattern:
1580 S2 a_T = (TYPE) a_t;
1581 S3 res_T = a_T << CONST;
1583 where type 'TYPE' is at least double the size of type 'type'.
1585 Also detect cases where the shift result is immediately converted
1586 to another type 'result_type' that is no larger in size than 'TYPE'.
1587 In those cases we perform a widen-shift that directly results in
1588 'result_type', to avoid a possible over-widening situation:
1592 result_type res_result;
1595 S2 a_T = (TYPE) a_t;
1596 S3 res_T = a_T << CONST;
1597 S4 res_result = (result_type) res_T;
1598 '--> res_result' = a_t w<< CONST;
1600 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1601 create an additional pattern stmt for S2 to create a variable of an
1602 intermediate type, and perform widen-shift on the intermediate type:
1606 TYPE a_T, res_T, res_T';
1609 S2 a_T = (TYPE) a_t;
1610 '--> a_it = (interm_type) a_t;
1611 S3 res_T = a_T << CONST;
1612 '--> res_T' = a_it <<* CONST;
1616 * STMTS: Contains a stmt from which the pattern search begins.
1617 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1618 in STMTS. When an intermediate type is used and a pattern statement is
1619 created for S2, we also put S2 here (before S3).
1623 * TYPE_OUT: The type of the output of this pattern.
1625 * Return value: A new stmt that will be used to replace the sequence of
1626 stmts that constitute the pattern. In this case it will be:
1627 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1630 vect_recog_widen_shift_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1632 gimple
*last_stmt
= stmts
->pop ();
1634 tree oprnd0
, oprnd1
;
1635 tree type
, half_type0
;
1636 gimple
*pattern_stmt
;
1637 tree vectype
, vectype_out
= NULL_TREE
;
1639 enum tree_code dummy_code
;
1641 vec
<tree
> dummy_vec
;
1645 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1648 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1651 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1654 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1655 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1656 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1659 /* Check operand 0: it has to be defined by a type promotion. */
1660 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1665 /* Check operand 1: has to be positive. We check that it fits the type
1666 in vect_handle_widen_op_by_const (). */
1667 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1670 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1671 type
= gimple_expr_type (last_stmt
);
1673 /* Check for subsequent conversion to another type. */
1674 use_stmt
= vect_single_imm_use (last_stmt
);
1675 if (use_stmt
&& is_gimple_assign (use_stmt
)
1676 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1677 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1679 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1680 tree use_type
= TREE_TYPE (use_lhs
);
1682 if (INTEGRAL_TYPE_P (use_type
)
1683 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1685 last_stmt
= use_stmt
;
1690 /* Check if this a widening operation. */
1691 gimple
*wstmt
= NULL
;
1692 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1694 type
, &half_type0
, def_stmt0
))
1697 /* Pattern detected. */
1698 vect_pattern_detected ("vect_recog_widen_shift_pattern", last_stmt
);
1700 /* Check target support. */
1701 vectype
= get_vectype_for_scalar_type (half_type0
);
1702 vectype_out
= get_vectype_for_scalar_type (type
);
1706 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1707 vectype_out
, vectype
,
1708 &dummy_code
, &dummy_code
,
1709 &dummy_int
, &dummy_vec
))
1712 *type_out
= vectype_out
;
1714 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1715 var
= vect_recog_temp_ssa_var (type
, NULL
);
1717 = gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1720 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1721 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1722 stmt_vec_info new_stmt_info
1723 = new_stmt_vec_info (wstmt
, stmt_vinfo
->vinfo
);
1724 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1725 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1728 stmts
->safe_push (last_stmt
);
1729 return pattern_stmt
;
1732 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1736 S0 a_t = b_t r<< c_t;
1740 * STMTS: Contains a stmt from which the pattern search begins,
1741 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1745 S2 e_t = d_t & (B - 1);
1746 S3 f_t = b_t << c_t;
1747 S4 g_t = b_t >> e_t;
1750 where B is element bitsize of type.
1754 * TYPE_OUT: The type of the output of this pattern.
1756 * Return value: A new stmt that will be used to replace the rotate
1760 vect_recog_rotate_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1762 gimple
*last_stmt
= stmts
->pop ();
1763 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1764 gimple
*pattern_stmt
, *def_stmt
;
1765 enum tree_code rhs_code
;
1766 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1767 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1768 enum vect_def_type dt
;
1769 optab optab1
, optab2
;
1770 edge ext_def
= NULL
;
1772 if (!is_gimple_assign (last_stmt
))
1775 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1785 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1788 lhs
= gimple_assign_lhs (last_stmt
);
1789 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1790 type
= TREE_TYPE (oprnd0
);
1791 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1792 if (TREE_CODE (oprnd0
) != SSA_NAME
1793 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1794 || !INTEGRAL_TYPE_P (type
)
1795 || !TYPE_UNSIGNED (type
))
1798 if (!vect_is_simple_use (oprnd1
, vinfo
, &def_stmt
, &dt
))
1801 if (dt
!= vect_internal_def
1802 && dt
!= vect_constant_def
1803 && dt
!= vect_external_def
)
1806 vectype
= get_vectype_for_scalar_type (type
);
1807 if (vectype
== NULL_TREE
)
1810 /* If vector/vector or vector/scalar rotate is supported by the target,
1811 don't do anything here. */
1812 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1814 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1817 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
1819 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1821 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1825 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1826 don't do anything here either. */
1827 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1828 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1830 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1832 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1834 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
1836 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1837 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1839 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1841 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1845 *type_out
= vectype
;
1847 if (dt
== vect_external_def
1848 && TREE_CODE (oprnd1
) == SSA_NAME
1849 && is_a
<loop_vec_info
> (vinfo
))
1851 struct loop
*loop
= as_a
<loop_vec_info
> (vinfo
)->loop
;
1852 ext_def
= loop_preheader_edge (loop
);
1853 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1855 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1857 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1863 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (type
);
1864 if (TREE_CODE (oprnd1
) == INTEGER_CST
1865 || TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
1867 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1869 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1870 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
1871 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1872 == TYPE_PRECISION (type
))
1876 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1877 if (def
== NULL_TREE
)
1879 def
= vect_recog_temp_ssa_var (type
, NULL
);
1880 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1884 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1885 gcc_assert (!new_bb
);
1888 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1890 stype
= TREE_TYPE (def
);
1891 scalar_int_mode smode
= SCALAR_INT_TYPE_MODE (stype
);
1893 if (TREE_CODE (def
) == INTEGER_CST
)
1895 if (!tree_fits_uhwi_p (def
)
1896 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
1897 || integer_zerop (def
))
1899 def2
= build_int_cst (stype
,
1900 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
1904 tree vecstype
= get_vectype_for_scalar_type (stype
);
1905 stmt_vec_info def_stmt_vinfo
;
1907 if (vecstype
== NULL_TREE
)
1909 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1910 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1914 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1915 gcc_assert (!new_bb
);
1919 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1920 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1921 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1922 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1925 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1926 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (smode
) - 1);
1927 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1928 gimple_assign_lhs (def_stmt
), mask
);
1932 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1933 gcc_assert (!new_bb
);
1937 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1938 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1939 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1940 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1944 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1945 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
1946 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
1948 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1950 var2
= vect_recog_temp_ssa_var (type
, NULL
);
1951 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
1952 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
1954 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1956 /* Pattern detected. */
1957 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
1959 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1960 var
= vect_recog_temp_ssa_var (type
, NULL
);
1961 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
1963 stmts
->safe_push (last_stmt
);
1964 return pattern_stmt
;
1967 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1975 S3 res_T = b_T op a_t;
1977 where type 'TYPE' is a type with different size than 'type',
1978 and op is <<, >> or rotate.
1983 TYPE b_T, c_T, res_T;
1986 S1 a_t = (type) c_T;
1988 S3 res_T = b_T op a_t;
1992 * STMTS: Contains a stmt from which the pattern search begins,
1993 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1994 with a shift/rotate which has same type on both operands, in the
1995 second case just b_T op c_T, in the first case with added cast
1996 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2000 * TYPE_OUT: The type of the output of this pattern.
2002 * Return value: A new stmt that will be used to replace the shift/rotate
2006 vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
2008 gimple
*last_stmt
= stmts
->pop ();
2009 tree oprnd0
, oprnd1
, lhs
, var
;
2010 gimple
*pattern_stmt
;
2011 enum tree_code rhs_code
;
2012 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2013 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2015 if (!is_gimple_assign (last_stmt
))
2018 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2030 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2033 lhs
= gimple_assign_lhs (last_stmt
);
2034 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2035 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2036 if (TREE_CODE (oprnd0
) != SSA_NAME
2037 || TREE_CODE (oprnd1
) != SSA_NAME
2038 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2039 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
2040 || TYPE_PRECISION (TREE_TYPE (lhs
))
2041 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2044 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
2048 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2049 if (*type_out
== NULL_TREE
)
2052 tree def
= NULL_TREE
;
2053 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
2054 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo
)
2056 && gimple_assign_cast_p (def_stmt
))
2058 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2059 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2060 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2061 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2063 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2064 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2069 = build_low_bits_mask (TREE_TYPE (rhs1
),
2070 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2071 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2072 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2073 stmt_vec_info new_stmt_info
2074 = new_stmt_vec_info (def_stmt
, vinfo
);
2075 set_vinfo_for_stmt (def_stmt
, new_stmt_info
);
2076 STMT_VINFO_VECTYPE (new_stmt_info
)
2077 = get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2078 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2083 if (def
== NULL_TREE
)
2085 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2086 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2087 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2090 /* Pattern detected. */
2091 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
2093 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2094 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2095 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2097 stmts
->safe_push (last_stmt
);
2098 return pattern_stmt
;
2101 /* Return true iff the target has a vector optab implementing the operation
2102 CODE on type VECTYPE. */
2105 target_has_vecop_for_code (tree_code code
, tree vectype
)
2107 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2109 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
2112 /* Verify that the target has optabs of VECTYPE to perform all the steps
2113 needed by the multiplication-by-immediate synthesis algorithm described by
2114 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2115 present. Return true iff the target supports all the steps. */
2118 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
2119 tree vectype
, bool synth_shift_p
)
2121 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
2124 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
2125 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
2127 if (var
== negate_variant
2128 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
2131 /* If we must synthesize shifts with additions make sure that vector
2132 addition is available. */
2133 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
2136 for (int i
= 1; i
< alg
->ops
; i
++)
2144 case alg_add_factor
:
2145 if (!supports_vplus
)
2150 case alg_sub_factor
:
2151 if (!supports_vminus
)
2157 case alg_impossible
:
2167 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2168 putting the final result in DEST. Append all statements but the last into
2169 VINFO. Return the last statement. */
2172 synth_lshift_by_additions (tree dest
, tree op
, HOST_WIDE_INT amnt
,
2173 stmt_vec_info vinfo
)
2176 tree itype
= TREE_TYPE (op
);
2178 gcc_assert (amnt
>= 0);
2179 for (i
= 0; i
< amnt
; i
++)
2181 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
2184 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
2187 append_pattern_def_seq (vinfo
, stmt
);
2195 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2196 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2197 the process if necessary. Append the resulting assignment statements
2198 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2199 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2200 left shifts using additions. */
2203 apply_binop_and_append_stmt (tree_code code
, tree op1
, tree op2
,
2204 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
2206 if (integer_zerop (op2
)
2207 && (code
== LSHIFT_EXPR
2208 || code
== PLUS_EXPR
))
2210 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
2215 tree itype
= TREE_TYPE (op1
);
2216 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
2218 if (code
== LSHIFT_EXPR
2221 stmt
= synth_lshift_by_additions (tmp_var
, op1
, TREE_INT_CST_LOW (op2
),
2223 append_pattern_def_seq (stmt_vinfo
, stmt
);
2227 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
2228 append_pattern_def_seq (stmt_vinfo
, stmt
);
2232 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2233 and simple arithmetic operations to be vectorized. Record the statements
2234 produced in STMT_VINFO and return the last statement in the sequence or
2235 NULL if it's not possible to synthesize such a multiplication.
2236 This function mirrors the behavior of expand_mult_const in expmed.c but
2237 works on tree-ssa form. */
2240 vect_synth_mult_by_constant (tree op
, tree val
,
2241 stmt_vec_info stmt_vinfo
)
2243 tree itype
= TREE_TYPE (op
);
2244 machine_mode mode
= TYPE_MODE (itype
);
2245 struct algorithm alg
;
2246 mult_variant variant
;
2247 if (!tree_fits_shwi_p (val
))
2250 /* Multiplication synthesis by shifts, adds and subs can introduce
2251 signed overflow where the original operation didn't. Perform the
2252 operations on an unsigned type and cast back to avoid this.
2253 In the future we may want to relax this for synthesis algorithms
2254 that we can prove do not cause unexpected overflow. */
2255 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
2257 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
2259 /* Targets that don't support vector shifts but support vector additions
2260 can synthesize shifts that way. */
2261 bool synth_shift_p
= !vect_supportable_shift (LSHIFT_EXPR
, multtype
);
2263 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
2264 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2265 The vectorizer's benefit analysis will decide whether it's beneficial
2267 bool possible
= choose_mult_variant (mode
, hwval
, &alg
,
2268 &variant
, MAX_COST
);
2272 tree vectype
= get_vectype_for_scalar_type (multtype
);
2275 || !target_supports_mult_synth_alg (&alg
, variant
,
2276 vectype
, synth_shift_p
))
2281 /* Clear out the sequence of statements so we can populate it below. */
2282 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2283 gimple
*stmt
= NULL
;
2285 if (cast_to_unsigned_p
)
2287 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
2288 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
2289 append_pattern_def_seq (stmt_vinfo
, stmt
);
2293 if (alg
.op
[0] == alg_zero
)
2294 accumulator
= build_int_cst (multtype
, 0);
2298 bool needs_fixup
= (variant
== negate_variant
)
2299 || (variant
== add_variant
);
2301 for (int i
= 1; i
< alg
.ops
; i
++)
2303 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
2304 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2305 tree tmp_var
= NULL_TREE
;
2312 = synth_lshift_by_additions (accum_tmp
, accumulator
, alg
.log
[i
],
2315 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
2320 = apply_binop_and_append_stmt (LSHIFT_EXPR
, op
, shft_log
,
2321 stmt_vinfo
, synth_shift_p
);
2322 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2326 tmp_var
= apply_binop_and_append_stmt (LSHIFT_EXPR
, op
,
2327 shft_log
, stmt_vinfo
,
2329 /* In some algorithms the first step involves zeroing the
2330 accumulator. If subtracting from such an accumulator
2331 just emit the negation directly. */
2332 if (integer_zerop (accumulator
))
2333 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
2335 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
2340 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2341 stmt_vinfo
, synth_shift_p
);
2342 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
2346 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2347 stmt_vinfo
, synth_shift_p
);
2348 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
2350 case alg_add_factor
:
2352 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2353 stmt_vinfo
, synth_shift_p
);
2354 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2357 case alg_sub_factor
:
2359 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2360 stmt_vinfo
, synth_shift_p
);
2361 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
2367 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2368 but rather return it directly. */
2370 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
2371 append_pattern_def_seq (stmt_vinfo
, stmt
);
2372 accumulator
= accum_tmp
;
2374 if (variant
== negate_variant
)
2376 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2377 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
2378 accumulator
= accum_tmp
;
2379 if (cast_to_unsigned_p
)
2380 append_pattern_def_seq (stmt_vinfo
, stmt
);
2382 else if (variant
== add_variant
)
2384 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2385 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
2386 accumulator
= accum_tmp
;
2387 if (cast_to_unsigned_p
)
2388 append_pattern_def_seq (stmt_vinfo
, stmt
);
2390 /* Move back to a signed if needed. */
2391 if (cast_to_unsigned_p
)
2393 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
2394 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
2400 /* Detect multiplication by constant and convert it into a sequence of
2401 shifts and additions, subtractions, negations. We reuse the
2402 choose_mult_variant algorithms from expmed.c
2406 STMTS: Contains a stmt from which the pattern search begins,
2411 * TYPE_OUT: The type of the output of this pattern.
2413 * Return value: A new stmt that will be used to replace
2414 the multiplication. */
2417 vect_recog_mult_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
2419 gimple
*last_stmt
= stmts
->pop ();
2420 tree oprnd0
, oprnd1
, vectype
, itype
;
2421 gimple
*pattern_stmt
;
2422 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2424 if (!is_gimple_assign (last_stmt
))
2427 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2430 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2431 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2432 itype
= TREE_TYPE (oprnd0
);
2434 if (TREE_CODE (oprnd0
) != SSA_NAME
2435 || TREE_CODE (oprnd1
) != INTEGER_CST
2436 || !INTEGRAL_TYPE_P (itype
)
2437 || !type_has_mode_precision_p (itype
))
2440 vectype
= get_vectype_for_scalar_type (itype
);
2441 if (vectype
== NULL_TREE
)
2444 /* If the target can handle vectorized multiplication natively,
2445 don't attempt to optimize this. */
2446 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2447 if (mul_optab
!= unknown_optab
)
2449 machine_mode vec_mode
= TYPE_MODE (vectype
);
2450 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
2451 if (icode
!= CODE_FOR_nothing
)
2455 pattern_stmt
= vect_synth_mult_by_constant (oprnd0
, oprnd1
, stmt_vinfo
);
2459 /* Pattern detected. */
2460 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
2462 stmts
->safe_push (last_stmt
);
2463 *type_out
= vectype
;
2465 return pattern_stmt
;
2468 /* Detect a signed division by a constant that wouldn't be
2469 otherwise vectorized:
2475 where type 'type' is an integral type and N is a constant.
2477 Similarly handle modulo by a constant:
2483 * STMTS: Contains a stmt from which the pattern search begins,
2484 i.e. the division stmt. S1 is replaced by if N is a power
2485 of two constant and type is signed:
2486 S3 y_t = b_t < 0 ? N - 1 : 0;
2488 S1' a_t = x_t >> log2 (N);
2490 S4 is replaced if N is a power of two constant and
2491 type is signed by (where *_T temporaries have unsigned type):
2492 S9 y_T = b_t < 0 ? -1U : 0U;
2493 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2494 S7 z_t = (type) z_T;
2496 S5 x_t = w_t & (N - 1);
2497 S4' a_t = x_t - z_t;
2501 * TYPE_OUT: The type of the output of this pattern.
2503 * Return value: A new stmt that will be used to replace the division
2504 S1 or modulo S4 stmt. */
2507 vect_recog_divmod_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
2509 gimple
*last_stmt
= stmts
->pop ();
2510 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2511 gimple
*pattern_stmt
, *def_stmt
;
2512 enum tree_code rhs_code
;
2513 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2514 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2517 int dummy_int
, prec
;
2518 stmt_vec_info def_stmt_vinfo
;
2520 if (!is_gimple_assign (last_stmt
))
2523 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2526 case TRUNC_DIV_EXPR
:
2527 case TRUNC_MOD_EXPR
:
2533 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2536 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2537 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2538 itype
= TREE_TYPE (oprnd0
);
2539 if (TREE_CODE (oprnd0
) != SSA_NAME
2540 || TREE_CODE (oprnd1
) != INTEGER_CST
2541 || TREE_CODE (itype
) != INTEGER_TYPE
2542 || !type_has_mode_precision_p (itype
))
2545 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
2546 vectype
= get_vectype_for_scalar_type (itype
);
2547 if (vectype
== NULL_TREE
)
2550 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
2552 /* If the target can handle vectorized division or modulo natively,
2553 don't attempt to optimize this, since native division is likely
2554 to give smaller code. */
2555 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2556 if (optab
!= unknown_optab
)
2558 machine_mode vec_mode
= TYPE_MODE (vectype
);
2559 int icode
= (int) optab_handler (optab
, vec_mode
);
2560 if (icode
!= CODE_FOR_nothing
)
2565 prec
= TYPE_PRECISION (itype
);
2566 if (integer_pow2p (oprnd1
))
2568 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2571 /* Pattern detected. */
2572 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
2574 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2575 build_int_cst (itype
, 0));
2576 if (rhs_code
== TRUNC_DIV_EXPR
)
2578 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2581 = gimple_build_assign (var
, COND_EXPR
, cond
,
2582 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2583 build_int_cst (itype
, 1)),
2584 build_int_cst (itype
, 0));
2585 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2586 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2588 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2589 gimple_assign_lhs (def_stmt
));
2590 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2592 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2594 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2595 RSHIFT_EXPR
, var
, shift
);
2600 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2601 if (compare_tree_int (oprnd1
, 2) == 0)
2603 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2604 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2605 build_int_cst (itype
, 1),
2606 build_int_cst (itype
, 0));
2607 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2612 = build_nonstandard_integer_type (prec
, 1);
2613 tree vecutype
= get_vectype_for_scalar_type (utype
);
2615 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
2616 - tree_log2 (oprnd1
));
2617 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2619 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2620 build_int_cst (utype
, -1),
2621 build_int_cst (utype
, 0));
2622 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2623 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2624 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2625 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2626 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2627 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2628 gimple_assign_lhs (def_stmt
),
2630 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2631 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2632 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2633 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2634 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2636 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2637 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2640 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2641 PLUS_EXPR
, oprnd0
, signmask
);
2642 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2644 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2645 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2646 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2647 build_int_cst (itype
, 1)));
2648 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2651 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2652 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2656 stmts
->safe_push (last_stmt
);
2658 *type_out
= vectype
;
2659 return pattern_stmt
;
2662 if (prec
> HOST_BITS_PER_WIDE_INT
2663 || integer_zerop (oprnd1
))
2666 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2669 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2671 if (TYPE_UNSIGNED (itype
))
2673 unsigned HOST_WIDE_INT mh
, ml
;
2674 int pre_shift
, post_shift
;
2675 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2676 & GET_MODE_MASK (itype_mode
));
2677 tree t1
, t2
, t3
, t4
;
2679 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
2680 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2683 /* Find a suitable multiplier and right shift count
2684 instead of multiplying with D. */
2685 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2687 /* If the suggested multiplier is more than SIZE bits, we can do better
2688 for even divisors, using an initial right shift. */
2689 if (mh
!= 0 && (d
& 1) == 0)
2691 pre_shift
= ctz_or_zero (d
);
2692 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2693 &ml
, &post_shift
, &dummy_int
);
2701 if (post_shift
- 1 >= prec
)
2704 /* t1 = oprnd0 h* ml;
2708 q = t4 >> (post_shift - 1); */
2709 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2710 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2711 build_int_cst (itype
, ml
));
2712 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2714 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2716 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2717 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2719 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2721 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2722 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2724 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2726 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2728 if (post_shift
!= 1)
2730 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2732 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2734 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2735 build_int_cst (itype
, post_shift
- 1));
2740 pattern_stmt
= def_stmt
;
2745 if (pre_shift
>= prec
|| post_shift
>= prec
)
2748 /* t1 = oprnd0 >> pre_shift;
2750 q = t2 >> post_shift; */
2753 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2755 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2756 build_int_cst (NULL
, pre_shift
));
2757 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2762 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2763 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2764 build_int_cst (itype
, ml
));
2768 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2770 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2772 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2773 build_int_cst (itype
, post_shift
));
2778 pattern_stmt
= def_stmt
;
2783 unsigned HOST_WIDE_INT ml
;
2785 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2786 unsigned HOST_WIDE_INT abs_d
;
2788 tree t1
, t2
, t3
, t4
;
2790 /* Give up for -1. */
2794 /* Since d might be INT_MIN, we have to cast to
2795 unsigned HOST_WIDE_INT before negating to avoid
2796 undefined signed overflow. */
2798 ? (unsigned HOST_WIDE_INT
) d
2799 : - (unsigned HOST_WIDE_INT
) d
);
2801 /* n rem d = n rem -d */
2802 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2805 oprnd1
= build_int_cst (itype
, abs_d
);
2807 else if (HOST_BITS_PER_WIDE_INT
>= prec
2808 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
2809 /* This case is not handled correctly below. */
2812 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2813 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
2816 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
2818 if (post_shift
>= prec
)
2821 /* t1 = oprnd0 h* ml; */
2822 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2823 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2824 build_int_cst (itype
, ml
));
2828 /* t2 = t1 + oprnd0; */
2829 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2830 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2831 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2838 /* t3 = t2 >> post_shift; */
2839 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2840 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2841 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2842 build_int_cst (itype
, post_shift
));
2847 wide_int oprnd0_min
, oprnd0_max
;
2849 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2851 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2853 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2857 if (msb
== 0 && d
>= 0)
2861 pattern_stmt
= def_stmt
;
2865 /* t4 = oprnd0 >> (prec - 1);
2866 or if we know from VRP that oprnd0 >= 0
2868 or if we know from VRP that oprnd0 < 0
2870 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2871 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2873 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2874 build_int_cst (itype
, msb
));
2876 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2877 build_int_cst (itype
, prec
- 1));
2878 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2880 /* q = t3 - t4; or q = t4 - t3; */
2881 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2882 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2887 if (rhs_code
== TRUNC_MOD_EXPR
)
2891 /* We divided. Now finish by:
2894 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2896 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2897 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2898 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2900 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2901 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2904 /* Pattern detected. */
2905 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
2907 stmts
->safe_push (last_stmt
);
2909 *type_out
= vectype
;
2910 return pattern_stmt
;
2913 /* Function vect_recog_mixed_size_cond_pattern
2915 Try to find the following pattern:
2920 S1 a_T = x_t CMP y_t ? b_T : c_T;
2922 where type 'TYPE' is an integral type which has different size
2923 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2924 than 'type', the constants need to fit into an integer type
2925 with the same width as 'type') or results of conversion from 'type'.
2929 * LAST_STMT: A stmt from which the pattern search begins.
2933 * TYPE_OUT: The type of the output of this pattern.
2935 * Return value: A new stmt that will be used to replace the pattern.
2936 Additionally a def_stmt is added.
2938 a_it = x_t CMP y_t ? b_it : c_it;
2939 a_T = (TYPE) a_it; */
2942 vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
2944 gimple
*last_stmt
= (*stmts
)[0];
2945 tree cond_expr
, then_clause
, else_clause
;
2946 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2947 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2948 gimple
*pattern_stmt
, *def_stmt
;
2949 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2950 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2951 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
2953 tree comp_scalar_type
;
2955 if (!is_gimple_assign (last_stmt
)
2956 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2957 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2960 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2961 then_clause
= gimple_assign_rhs2 (last_stmt
);
2962 else_clause
= gimple_assign_rhs3 (last_stmt
);
2964 if (!COMPARISON_CLASS_P (cond_expr
))
2967 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2968 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2969 if (comp_vectype
== NULL_TREE
)
2972 type
= gimple_expr_type (last_stmt
);
2973 if (types_compatible_p (type
, comp_scalar_type
)
2974 || ((TREE_CODE (then_clause
) != INTEGER_CST
2975 || TREE_CODE (else_clause
) != INTEGER_CST
)
2976 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2977 || !INTEGRAL_TYPE_P (type
))
2980 if ((TREE_CODE (then_clause
) != INTEGER_CST
2981 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2982 &def_stmt0
, &promotion
))
2983 || (TREE_CODE (else_clause
) != INTEGER_CST
2984 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2985 &def_stmt1
, &promotion
)))
2988 if (orig_type0
&& orig_type1
2989 && !types_compatible_p (orig_type0
, orig_type1
))
2994 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2996 then_clause
= gimple_assign_rhs1 (def_stmt0
);
3002 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
3004 else_clause
= gimple_assign_rhs1 (def_stmt1
);
3009 HOST_WIDE_INT cmp_mode_size
3010 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
3012 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
3013 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
3016 vectype
= get_vectype_for_scalar_type (type
);
3017 if (vectype
== NULL_TREE
)
3020 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
3023 if (itype
== NULL_TREE
)
3024 itype
= build_nonstandard_integer_type (cmp_mode_size
,
3025 TYPE_UNSIGNED (type
));
3027 if (itype
== NULL_TREE
3028 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
3031 vecitype
= get_vectype_for_scalar_type (itype
);
3032 if (vecitype
== NULL_TREE
)
3035 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
3038 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
3040 if ((TREE_CODE (then_clause
) == INTEGER_CST
3041 && !int_fits_type_p (then_clause
, itype
))
3042 || (TREE_CODE (else_clause
) == INTEGER_CST
3043 && !int_fits_type_p (else_clause
, itype
)))
3047 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3048 COND_EXPR
, unshare_expr (cond_expr
),
3049 fold_convert (itype
, then_clause
),
3050 fold_convert (itype
, else_clause
));
3051 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3052 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
3054 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
3055 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
3056 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3057 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
3058 *type_out
= vectype
;
3060 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
3062 return pattern_stmt
;
3066 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3067 true if bool VAR can and should be optimized that way. Assume it shouldn't
3068 in case it's a result of a comparison which can be directly vectorized into
3069 a vector comparison. Fills in STMTS with all stmts visited during the
3073 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
3076 enum tree_code rhs_code
;
3078 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3082 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3086 if (stmts
.contains (def_stmt
))
3089 rhs1
= gimple_assign_rhs1 (def_stmt
);
3090 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3094 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3099 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3101 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3106 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3113 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
3114 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
3119 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3121 tree vecitype
, comp_vectype
;
3123 /* If the comparison can throw, then is_gimple_condexpr will be
3124 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3125 if (stmt_could_throw_p (def_stmt
))
3128 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3129 if (comp_vectype
== NULL_TREE
)
3132 tree mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3134 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3137 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
3139 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3141 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3142 vecitype
= get_vectype_for_scalar_type (itype
);
3143 if (vecitype
== NULL_TREE
)
3147 vecitype
= comp_vectype
;
3148 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
3156 bool res
= stmts
.add (def_stmt
);
3157 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3164 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3165 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3166 pattern sequence. */
3169 adjust_bool_pattern_cast (tree type
, tree var
, stmt_vec_info stmt_info
)
3171 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3173 stmt_vec_info patt_vinfo
= new_stmt_vec_info (cast_stmt
, stmt_info
->vinfo
);
3174 set_vinfo_for_stmt (cast_stmt
, patt_vinfo
);
3175 STMT_VINFO_VECTYPE (patt_vinfo
) = get_vectype_for_scalar_type (type
);
3176 append_pattern_def_seq (stmt_info
, cast_stmt
);
3177 return gimple_assign_lhs (cast_stmt
);
3180 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3181 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3182 type, OUT_TYPE is the desired final integer type of the whole pattern.
3183 STMT_INFO is the info of the pattern root and is where pattern stmts should
3184 be associated with. DEFS is a map of pattern defs. */
3187 adjust_bool_pattern (tree var
, tree out_type
,
3188 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
3190 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3191 enum tree_code rhs_code
, def_rhs_code
;
3192 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3194 gimple
*pattern_stmt
, *def_stmt
;
3195 tree trueval
= NULL_TREE
;
3197 rhs1
= gimple_assign_rhs1 (stmt
);
3198 rhs2
= gimple_assign_rhs2 (stmt
);
3199 rhs_code
= gimple_assign_rhs_code (stmt
);
3200 loc
= gimple_location (stmt
);
3205 irhs1
= *defs
.get (rhs1
);
3206 itype
= TREE_TYPE (irhs1
);
3208 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3213 irhs1
= *defs
.get (rhs1
);
3214 itype
= TREE_TYPE (irhs1
);
3216 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3217 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3221 /* Try to optimize x = y & (a < b ? 1 : 0); into
3222 x = (a < b ? y : 0);
3228 S1 a_b = x1 CMP1 y1;
3229 S2 b_b = x2 CMP2 y2;
3231 S4 d_T = (TYPE) c_b;
3233 we would normally emit:
3235 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3236 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3237 S3' c_T = a_T & b_T;
3240 but we can save one stmt by using the
3241 result of one of the COND_EXPRs in the other COND_EXPR and leave
3242 BIT_AND_EXPR stmt out:
3244 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3245 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3248 At least when VEC_COND_EXPR is implemented using masks
3249 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3250 computes the comparison masks and ands it, in one case with
3251 all ones vector, in the other case with a vector register.
3252 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3253 often more expensive. */
3254 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3255 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3256 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3258 irhs1
= *defs
.get (rhs1
);
3259 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3260 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3261 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3263 rhs_code
= def_rhs_code
;
3265 rhs2
= gimple_assign_rhs2 (def_stmt
);
3270 irhs2
= *defs
.get (rhs2
);
3273 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3274 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3275 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3277 irhs2
= *defs
.get (rhs2
);
3278 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3279 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3280 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3282 rhs_code
= def_rhs_code
;
3284 rhs2
= gimple_assign_rhs2 (def_stmt
);
3289 irhs1
= *defs
.get (rhs1
);
3295 irhs1
= *defs
.get (rhs1
);
3296 irhs2
= *defs
.get (rhs2
);
3298 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3299 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3301 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3302 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3303 int out_prec
= TYPE_PRECISION (out_type
);
3304 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3305 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), irhs2
,
3307 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3308 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), irhs1
,
3312 irhs1
= adjust_bool_pattern_cast (out_type
, irhs1
, stmt_info
);
3313 irhs2
= adjust_bool_pattern_cast (out_type
, irhs2
, stmt_info
);
3316 itype
= TREE_TYPE (irhs1
);
3318 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3319 rhs_code
, irhs1
, irhs2
);
3324 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3325 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3326 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3327 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
3328 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3330 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3332 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3335 itype
= TREE_TYPE (rhs1
);
3336 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3337 if (trueval
== NULL_TREE
)
3338 trueval
= build_int_cst (itype
, 1);
3340 gcc_checking_assert (useless_type_conversion_p (itype
,
3341 TREE_TYPE (trueval
)));
3343 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3344 COND_EXPR
, cond_expr
, trueval
,
3345 build_int_cst (itype
, 0));
3349 gimple_set_location (pattern_stmt
, loc
);
3350 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3351 pattern def seq stmts instead of just letting auto-detection do
3353 stmt_vec_info patt_vinfo
= new_stmt_vec_info (pattern_stmt
, stmt_info
->vinfo
);
3354 set_vinfo_for_stmt (pattern_stmt
, patt_vinfo
);
3355 STMT_VINFO_VECTYPE (patt_vinfo
) = get_vectype_for_scalar_type (itype
);
3356 append_pattern_def_seq (stmt_info
, pattern_stmt
);
3357 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
3360 /* Comparison function to qsort a vector of gimple stmts after UID. */
3363 sort_after_uid (const void *p1
, const void *p2
)
3365 const gimple
*stmt1
= *(const gimple
* const *)p1
;
3366 const gimple
*stmt2
= *(const gimple
* const *)p2
;
3367 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
3370 /* Create pattern stmts for all stmts participating in the bool pattern
3371 specified by BOOL_STMT_SET and its root STMT with the desired type
3372 OUT_TYPE. Return the def of the pattern root. */
3375 adjust_bool_stmts (hash_set
<gimple
*> &bool_stmt_set
,
3376 tree out_type
, gimple
*stmt
)
3378 /* Gather original stmts in the bool pattern in their order of appearance
3380 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
3381 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
3382 i
!= bool_stmt_set
.end (); ++i
)
3383 bool_stmts
.quick_push (*i
);
3384 bool_stmts
.qsort (sort_after_uid
);
3386 /* Now process them in that order, producing pattern stmts. */
3387 hash_map
<tree
, tree
> defs
;
3388 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
3389 adjust_bool_pattern (gimple_assign_lhs (bool_stmts
[i
]),
3390 out_type
, vinfo_for_stmt (stmt
), defs
);
3392 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3393 gimple
*pattern_stmt
3394 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt
)));
3395 return gimple_assign_lhs (pattern_stmt
);
3398 /* Helper for search_type_for_mask. */
3401 search_type_for_mask_1 (tree var
, vec_info
*vinfo
,
3402 hash_map
<gimple
*, tree
> &cache
)
3405 enum tree_code rhs_code
;
3406 tree res
= NULL_TREE
, res2
;
3408 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3411 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3415 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3419 tree
*c
= cache
.get (def_stmt
);
3423 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3424 rhs1
= gimple_assign_rhs1 (def_stmt
);
3431 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3437 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3438 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
), vinfo
,
3440 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3445 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3447 tree comp_vectype
, mask_type
;
3449 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3451 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3452 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
),
3454 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3459 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3460 if (comp_vectype
== NULL_TREE
)
3466 mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3468 || !expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3474 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3475 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
3477 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3478 res
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3481 res
= TREE_TYPE (rhs1
);
3485 cache
.put (def_stmt
, res
);
3489 /* Return the proper type for converting bool VAR into
3490 an integer value or NULL_TREE if no such type exists.
3491 The type is chosen so that converted value has the
3492 same number of elements as VAR's vector type. */
3495 search_type_for_mask (tree var
, vec_info
*vinfo
)
3497 hash_map
<gimple
*, tree
> cache
;
3498 return search_type_for_mask_1 (var
, vinfo
, cache
);
3501 /* Function vect_recog_bool_pattern
3503 Try to find pattern like following:
3505 bool a_b, b_b, c_b, d_b, e_b;
3508 S1 a_b = x1 CMP1 y1;
3509 S2 b_b = x2 CMP2 y2;
3511 S4 d_b = x3 CMP3 y3;
3513 S6 f_T = (TYPE) e_b;
3515 where type 'TYPE' is an integral type. Or a similar pattern
3518 S6 f_Y = e_b ? r_Y : s_Y;
3520 as results from if-conversion of a complex condition.
3524 * LAST_STMT: A stmt at the end from which the pattern
3525 search begins, i.e. cast of a bool to
3530 * TYPE_OUT: The type of the output of this pattern.
3532 * Return value: A new stmt that will be used to replace the pattern.
3534 Assuming size of TYPE is the same as size of all comparisons
3535 (otherwise some casts would be added where needed), the above
3536 sequence we create related pattern stmts:
3537 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3538 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3539 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3540 S5' e_T = c_T | d_T;
3543 Instead of the above S3' we could emit:
3544 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3545 S3' c_T = a_T | b_T;
3546 but the above is more efficient. */
3549 vect_recog_bool_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
3551 gimple
*last_stmt
= stmts
->pop ();
3552 enum tree_code rhs_code
;
3553 tree var
, lhs
, rhs
, vectype
;
3554 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3555 stmt_vec_info new_stmt_info
;
3556 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3557 gimple
*pattern_stmt
;
3559 if (!is_gimple_assign (last_stmt
))
3562 var
= gimple_assign_rhs1 (last_stmt
);
3563 lhs
= gimple_assign_lhs (last_stmt
);
3565 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3568 hash_set
<gimple
*> bool_stmts
;
3570 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3571 if (CONVERT_EXPR_CODE_P (rhs_code
))
3573 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3574 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3576 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3577 if (vectype
== NULL_TREE
)
3580 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3582 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (lhs
), last_stmt
);
3583 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3584 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3585 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3588 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3592 tree type
= search_type_for_mask (var
, vinfo
);
3593 tree cst0
, cst1
, tmp
;
3598 /* We may directly use cond with narrowed type to avoid
3599 multiple cond exprs with following result packing and
3600 perform single cond with packed mask instead. In case
3601 of widening we better make cond first and then extract
3603 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
3604 type
= TREE_TYPE (lhs
);
3606 cst0
= build_int_cst (type
, 0);
3607 cst1
= build_int_cst (type
, 1);
3608 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
3609 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
3611 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
3613 tree new_vectype
= get_vectype_for_scalar_type (type
);
3614 new_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3615 set_vinfo_for_stmt (pattern_stmt
, new_stmt_info
);
3616 STMT_VINFO_VECTYPE (new_stmt_info
) = new_vectype
;
3617 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3619 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3620 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
3624 *type_out
= vectype
;
3625 stmts
->safe_push (last_stmt
);
3626 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3628 return pattern_stmt
;
3630 else if (rhs_code
== COND_EXPR
3631 && TREE_CODE (var
) == SSA_NAME
)
3633 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3634 if (vectype
== NULL_TREE
)
3637 /* Build a scalar type for the boolean result that when
3638 vectorized matches the vector type of the result in
3639 size and number of elements. */
3641 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
3642 TYPE_VECTOR_SUBPARTS (vectype
));
3645 = build_nonstandard_integer_type (prec
,
3646 TYPE_UNSIGNED (TREE_TYPE (var
)));
3647 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3650 if (!check_bool_pattern (var
, vinfo
, bool_stmts
))
3653 rhs
= adjust_bool_stmts (bool_stmts
, type
, last_stmt
);
3655 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3657 = gimple_build_assign (lhs
, COND_EXPR
,
3658 build2 (NE_EXPR
, boolean_type_node
,
3659 rhs
, build_int_cst (type
, 0)),
3660 gimple_assign_rhs2 (last_stmt
),
3661 gimple_assign_rhs3 (last_stmt
));
3662 *type_out
= vectype
;
3663 stmts
->safe_push (last_stmt
);
3664 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3666 return pattern_stmt
;
3668 else if (rhs_code
== SSA_NAME
3669 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3671 stmt_vec_info pattern_stmt_info
;
3672 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3673 gcc_assert (vectype
!= NULL_TREE
);
3674 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3677 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3678 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (vectype
), last_stmt
);
3681 tree type
= search_type_for_mask (var
, vinfo
);
3682 tree cst0
, cst1
, new_vectype
;
3687 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
3688 type
= TREE_TYPE (vectype
);
3690 cst0
= build_int_cst (type
, 0);
3691 cst1
= build_int_cst (type
, 1);
3692 new_vectype
= get_vectype_for_scalar_type (type
);
3694 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
3695 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
3697 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3698 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3699 STMT_VINFO_VECTYPE (pattern_stmt_info
) = new_vectype
;
3700 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3703 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3704 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3706 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3707 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3708 append_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3711 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3712 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3713 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3714 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3715 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3716 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3717 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3718 *type_out
= vectype
;
3719 stmts
->safe_push (last_stmt
);
3720 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3722 return pattern_stmt
;
3729 /* A helper for vect_recog_mask_conversion_pattern. Build
3730 conversion of MASK to a type suitable for masking VECTYPE.
3731 Built statement gets required vectype and is appended to
3732 a pattern sequence of STMT_VINFO.
3734 Return converted mask. */
3737 build_mask_conversion (tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
,
3742 stmt_vec_info new_stmt_info
;
3744 masktype
= build_same_sized_truth_vector_type (vectype
);
3745 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
3746 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
3747 new_stmt_info
= new_stmt_vec_info (stmt
, vinfo
);
3748 set_vinfo_for_stmt (stmt
, new_stmt_info
);
3749 STMT_VINFO_VECTYPE (new_stmt_info
) = masktype
;
3750 append_pattern_def_seq (stmt_vinfo
, stmt
);
3756 /* Function vect_recog_mask_conversion_pattern
3758 Try to find statements which require boolean type
3759 converison. Additional conversion statements are
3760 added to handle such cases. For example:
3770 S4 c_1 = m_3 ? c_2 : c_3;
3772 Will be transformed into:
3776 S3'' m_2' = (_Bool[bitsize=32])m_2
3777 S3' m_3' = m_1 & m_2';
3778 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3779 S4' c_1' = m_3'' ? c_2 : c_3; */
3782 vect_recog_mask_conversion_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
3784 gimple
*last_stmt
= stmts
->pop ();
3785 enum tree_code rhs_code
;
3786 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
3787 tree vectype1
, vectype2
;
3788 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3789 stmt_vec_info pattern_stmt_info
;
3790 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3792 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3793 if (is_gimple_call (last_stmt
)
3794 && gimple_call_internal_p (last_stmt
)
3795 && (gimple_call_internal_fn (last_stmt
) == IFN_MASK_STORE
3796 || gimple_call_internal_fn (last_stmt
) == IFN_MASK_LOAD
))
3798 gcall
*pattern_stmt
;
3799 bool load
= (gimple_call_internal_fn (last_stmt
) == IFN_MASK_LOAD
);
3803 lhs
= gimple_call_lhs (last_stmt
);
3804 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3808 rhs2
= gimple_call_arg (last_stmt
, 3);
3809 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (rhs2
));
3812 rhs1
= gimple_call_arg (last_stmt
, 2);
3813 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3816 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3818 if (!vectype1
|| !vectype2
3819 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3820 TYPE_VECTOR_SUBPARTS (vectype2
)))
3823 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
3827 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3829 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
3830 gimple_call_arg (last_stmt
, 0),
3831 gimple_call_arg (last_stmt
, 1),
3833 gimple_call_set_lhs (pattern_stmt
, lhs
);
3837 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
3838 gimple_call_arg (last_stmt
, 0),
3839 gimple_call_arg (last_stmt
, 1),
3841 gimple_call_arg (last_stmt
, 3));
3843 gimple_call_set_nothrow (pattern_stmt
, true);
3845 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3846 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3847 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3848 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3849 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3850 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3851 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info
)
3852 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
);
3854 *type_out
= vectype1
;
3855 stmts
->safe_push (last_stmt
);
3856 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
3858 return pattern_stmt
;
3861 if (!is_gimple_assign (last_stmt
))
3864 gimple
*pattern_stmt
;
3865 lhs
= gimple_assign_lhs (last_stmt
);
3866 rhs1
= gimple_assign_rhs1 (last_stmt
);
3867 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3869 /* Check for cond expression requiring mask conversion. */
3870 if (rhs_code
== COND_EXPR
)
3872 /* vect_recog_mixed_size_cond_pattern could apply.
3874 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
3877 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3879 if (TREE_CODE (rhs1
) == SSA_NAME
)
3881 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3885 else if (COMPARISON_CLASS_P (rhs1
))
3887 /* Check whether we're comparing scalar booleans and (if so)
3888 whether a better mask type exists than the mask associated
3889 with boolean-sized elements. This avoids unnecessary packs
3890 and unpacks if the booleans are set from comparisons of
3891 wider types. E.g. in:
3893 int x1, x2, x3, x4, y1, y1;
3895 bool b1 = (x1 == x2);
3896 bool b2 = (x3 == x4);
3897 ... = b1 == b2 ? y1 : y2;
3899 it is better for b1 and b2 to use the mask type associated
3900 with int elements rather bool (byte) elements. */
3901 rhs1_type
= search_type_for_mask (TREE_OPERAND (rhs1
, 0), vinfo
);
3903 rhs1_type
= TREE_TYPE (TREE_OPERAND (rhs1
, 0));
3908 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3910 if (!vectype1
|| !vectype2
)
3913 /* Continue if a conversion is needed. Also continue if we have
3914 a comparison whose vector type would normally be different from
3915 VECTYPE2 when considered in isolation. In that case we'll
3916 replace the comparison with an SSA name (so that we can record
3917 its vector type) and behave as though the comparison was an SSA
3918 name from the outset. */
3919 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3920 TYPE_VECTOR_SUBPARTS (vectype2
))
3921 && (TREE_CODE (rhs1
) == SSA_NAME
3922 || rhs1_type
== TREE_TYPE (TREE_OPERAND (rhs1
, 0))))
3925 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3926 in place, we can handle it in vectorizable_condition. This avoids
3927 unnecessary promotion stmts and increased vectorization factor. */
3928 if (COMPARISON_CLASS_P (rhs1
)
3929 && INTEGRAL_TYPE_P (rhs1_type
)
3930 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
3931 TYPE_VECTOR_SUBPARTS (vectype2
)))
3934 enum vect_def_type dt
;
3935 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), stmt_vinfo
->vinfo
,
3937 && dt
== vect_external_def
3938 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), stmt_vinfo
->vinfo
,
3940 && (dt
== vect_external_def
3941 || dt
== vect_constant_def
))
3943 tree wide_scalar_type
= build_nonstandard_integer_type
3944 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1
))),
3945 TYPE_UNSIGNED (rhs1_type
));
3946 tree vectype3
= get_vectype_for_scalar_type (wide_scalar_type
);
3947 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
3952 /* If rhs1 is a comparison we need to move it into a
3953 separate statement. */
3954 if (TREE_CODE (rhs1
) != SSA_NAME
)
3956 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
3957 pattern_stmt
= gimple_build_assign (tmp
, rhs1
);
3960 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3961 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3962 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype2
;
3963 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3966 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
3967 TYPE_VECTOR_SUBPARTS (vectype2
)))
3968 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
3972 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3973 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
3974 gimple_assign_rhs2 (last_stmt
),
3975 gimple_assign_rhs3 (last_stmt
));
3977 *type_out
= vectype1
;
3978 stmts
->safe_push (last_stmt
);
3979 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
3981 return pattern_stmt
;
3984 /* Now check for binary boolean operations requiring conversion for
3986 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
3989 if (rhs_code
!= BIT_IOR_EXPR
3990 && rhs_code
!= BIT_XOR_EXPR
3991 && rhs_code
!= BIT_AND_EXPR
3992 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
3995 rhs2
= gimple_assign_rhs2 (last_stmt
);
3997 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3998 rhs2_type
= search_type_for_mask (rhs2
, vinfo
);
4000 if (!rhs1_type
|| !rhs2_type
4001 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
4004 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
4006 vectype1
= get_mask_type_for_scalar_type (rhs1_type
);
4009 rhs2
= build_mask_conversion (rhs2
, vectype1
, stmt_vinfo
, vinfo
);
4013 vectype1
= get_mask_type_for_scalar_type (rhs2_type
);
4016 rhs1
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
4019 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4020 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
4022 *type_out
= vectype1
;
4023 stmts
->safe_push (last_stmt
);
4024 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4026 return pattern_stmt
;
4029 /* STMT is a load or store. If the load or store is conditional, return
4030 the boolean condition under which it occurs, otherwise return null. */
4033 vect_get_load_store_mask (gimple
*stmt
)
4035 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt
))
4037 gcc_assert (gimple_assign_single_p (def_assign
));
4041 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt
))
4043 internal_fn ifn
= gimple_call_internal_fn (def_call
);
4044 int mask_index
= internal_fn_mask_index (ifn
);
4045 return gimple_call_arg (def_call
, mask_index
);
4051 /* Return the scalar offset type that an internal gather/scatter function
4052 should use. GS_INFO describes the gather/scatter operation. */
4055 vect_get_gather_scatter_offset_type (gather_scatter_info
*gs_info
)
4057 tree offset_type
= TREE_TYPE (gs_info
->offset
);
4058 unsigned int element_bits
= tree_to_uhwi (TYPE_SIZE (gs_info
->element_type
));
4060 /* Enforced by vect_check_gather_scatter. */
4061 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
4062 gcc_assert (element_bits
>= offset_bits
);
4064 /* If the offset is narrower than the elements, extend it according
4066 if (element_bits
> offset_bits
)
4067 return build_nonstandard_integer_type (element_bits
,
4068 TYPE_UNSIGNED (offset_type
));
4073 /* Return MASK if MASK is suitable for masking an operation on vectors
4074 of type VECTYPE, otherwise convert it into such a form and return
4075 the result. Associate any conversion statements with STMT_INFO's
4079 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
4080 stmt_vec_info stmt_info
, vec_info
*vinfo
)
4082 tree mask_type
= search_type_for_mask (mask
, vinfo
);
4085 tree mask_vectype
= get_mask_type_for_scalar_type (mask_type
);
4087 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
4088 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
4089 mask
= build_mask_conversion (mask
, vectype
, stmt_info
, vinfo
);
4094 /* Return the equivalent of:
4096 fold_convert (TYPE, VALUE)
4098 with the expectation that the operation will be vectorized.
4099 If new statements are needed, add them as pattern statements
4103 vect_add_conversion_to_patterm (tree type
, tree value
,
4104 stmt_vec_info stmt_info
,
4107 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
4110 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
4111 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
4112 stmt_vec_info new_stmt_info
= new_stmt_vec_info (conversion
, vinfo
);
4113 set_vinfo_for_stmt (conversion
, new_stmt_info
);
4114 STMT_VINFO_VECTYPE (new_stmt_info
) = get_vectype_for_scalar_type (type
);
4115 append_pattern_def_seq (stmt_info
, conversion
);
4119 /* Try to convert STMT into a call to a gather load or scatter store
4120 internal function. Return the final statement on success and set
4121 *TYPE_OUT to the vector type being loaded or stored.
4123 This function only handles gathers and scatters that were recognized
4124 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4127 vect_try_gather_scatter_pattern (gimple
*stmt
, stmt_vec_info last_stmt_info
,
4130 /* Currently we only support this for loop vectorization. */
4131 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4132 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (stmt_info
->vinfo
);
4136 /* Make sure that we're looking at a gather load or scatter store. */
4137 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4138 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
4141 /* Get the boolean that controls whether the load or store happens.
4142 This is null if the operation is unconditional. */
4143 tree mask
= vect_get_load_store_mask (stmt
);
4145 /* Make sure that the target supports an appropriate internal
4146 function for the gather/scatter operation. */
4147 gather_scatter_info gs_info
;
4148 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
)
4152 /* Convert the mask to the right form. */
4153 tree gs_vectype
= get_vectype_for_scalar_type (gs_info
.element_type
);
4155 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, last_stmt_info
,
4158 /* Get the invariant base and non-invariant offset, converting the
4159 latter to the same width as the vector elements. */
4160 tree base
= gs_info
.base
;
4161 tree offset_type
= vect_get_gather_scatter_offset_type (&gs_info
);
4162 tree offset
= vect_add_conversion_to_patterm (offset_type
, gs_info
.offset
,
4163 last_stmt_info
, loop_vinfo
);
4165 /* Build the new pattern statement. */
4166 tree scale
= size_int (gs_info
.scale
);
4167 gcall
*pattern_stmt
;
4168 if (DR_IS_READ (dr
))
4171 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
4172 offset
, scale
, mask
);
4174 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 3, base
,
4176 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
4177 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
4181 tree rhs
= vect_get_store_rhs (stmt
);
4183 pattern_stmt
= gimple_build_call_internal (IFN_MASK_SCATTER_STORE
, 5,
4184 base
, offset
, scale
, rhs
,
4187 pattern_stmt
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
4188 base
, offset
, scale
, rhs
);
4190 gimple_call_set_nothrow (pattern_stmt
, true);
4192 /* Copy across relevant vectorization info and associate DR with the
4193 new pattern statement instead of the original statement. */
4194 stmt_vec_info pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
,
4196 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4197 STMT_VINFO_DATA_REF (pattern_stmt_info
) = dr
;
4198 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
4199 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info
);
4200 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info
)
4201 = STMT_VINFO_GATHER_SCATTER_P (stmt_info
);
4203 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4204 *type_out
= vectype
;
4205 vect_pattern_detected ("gather/scatter pattern", stmt
);
4207 return pattern_stmt
;
4210 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4213 vect_recog_gather_scatter_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
4215 gimple
*last_stmt
= stmts
->pop ();
4216 stmt_vec_info last_stmt_info
= vinfo_for_stmt (last_stmt
);
4217 gimple
*pattern_stmt
= vect_try_gather_scatter_pattern (last_stmt
,
4221 stmts
->safe_push (last_stmt
);
4222 return pattern_stmt
;
4225 typedef gimple
*(*vect_recog_func_ptr
) (vec
<gimple
*> *, tree
*);
4227 struct vect_recog_func
4229 vect_recog_func_ptr fn
;
4233 /* Note that ordering matters - the first pattern matching on a stmt is
4234 taken which means usually the more complex one needs to preceed the
4235 less comples onex (widen_sum only after dot_prod or sad for example). */
4236 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
4237 { vect_recog_widen_mult_pattern
, "widen_mult" },
4238 { vect_recog_dot_prod_pattern
, "dot_prod" },
4239 { vect_recog_sad_pattern
, "sad" },
4240 { vect_recog_widen_sum_pattern
, "widen_sum" },
4241 { vect_recog_pow_pattern
, "pow" },
4242 { vect_recog_widen_shift_pattern
, "widen_shift" },
4243 { vect_recog_over_widening_pattern
, "over_widening" },
4244 { vect_recog_rotate_pattern
, "rotate" },
4245 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
4246 { vect_recog_divmod_pattern
, "divmod" },
4247 { vect_recog_mult_pattern
, "mult" },
4248 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
4249 { vect_recog_bool_pattern
, "bool" },
4250 /* This must come before mask conversion, and includes the parts
4251 of mask conversion that are needed for gather and scatter
4252 internal functions. */
4253 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
4254 { vect_recog_mask_conversion_pattern
, "mask_conversion" }
4257 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
4259 /* Mark statements that are involved in a pattern. */
4262 vect_mark_pattern_stmts (gimple
*orig_stmt
, gimple
*pattern_stmt
,
4263 tree pattern_vectype
)
4265 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
4266 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
4267 vec_info
*vinfo
= orig_stmt_info
->vinfo
;
4270 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
4271 if (pattern_stmt_info
== NULL
)
4273 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
4274 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4276 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
4278 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
4279 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
4280 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
4281 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
4282 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
4283 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
4284 if (gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
))
4285 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
4286 !gsi_end_p (si
); gsi_next (&si
))
4288 def_stmt
= gsi_stmt (si
);
4289 def_stmt_info
= vinfo_for_stmt (def_stmt
);
4290 if (def_stmt_info
== NULL
)
4292 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
4293 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
4295 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
4296 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
4297 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
4298 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
4299 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
4303 /* Function vect_pattern_recog_1
4306 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4307 computation pattern.
4308 STMT: A stmt from which the pattern search should start.
4310 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4311 a sequence of statements that has the same functionality and can be
4312 used to replace STMT. It returns the last statement in the sequence
4313 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4314 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4315 statement, having first checked that the target supports the new operation
4318 This function also does some bookkeeping, as explained in the documentation
4319 for vect_recog_pattern. */
4322 vect_pattern_recog_1 (vect_recog_func
*recog_func
,
4323 gimple_stmt_iterator si
,
4324 vec
<gimple
*> *stmts_to_replace
)
4326 gimple
*stmt
= gsi_stmt (si
), *pattern_stmt
;
4327 stmt_vec_info stmt_info
;
4328 loop_vec_info loop_vinfo
;
4329 tree pattern_vectype
;
4332 stmts_to_replace
->truncate (0);
4333 stmts_to_replace
->quick_push (stmt
);
4334 pattern_stmt
= recog_func
->fn (stmts_to_replace
, &pattern_vectype
);
4337 /* Clear related stmt info that analysis might have noted for
4338 to be replaced stmts. */
4339 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
4340 && (unsigned) i
< stmts_to_replace
->length ();
4343 stmt_info
= vinfo_for_stmt (stmt
);
4344 STMT_VINFO_RELATED_STMT (stmt_info
) = NULL
;
4349 stmt
= stmts_to_replace
->last ();
4350 stmt_info
= vinfo_for_stmt (stmt
);
4351 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4352 gcc_assert (pattern_vectype
);
4354 /* Found a vectorizable pattern. */
4355 if (dump_enabled_p ())
4357 dump_printf_loc (MSG_NOTE
, vect_location
,
4358 "%s pattern recognized: ", recog_func
->name
);
4359 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4362 /* Mark the stmts that are involved in the pattern. */
4363 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
4365 /* Patterns cannot be vectorized using SLP, because they change the order of
4371 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
4372 elem_ptr
, *elem_ptr
== stmt
);
4375 /* It is possible that additional pattern stmts are created and inserted in
4376 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4377 relevant statements. */
4378 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
4379 && (unsigned) i
< (stmts_to_replace
->length () - 1);
4382 stmt_info
= vinfo_for_stmt (stmt
);
4383 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
4384 if (dump_enabled_p ())
4386 dump_printf_loc (MSG_NOTE
, vect_location
,
4387 "additional pattern stmt: ");
4388 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4391 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
4398 /* Function vect_pattern_recog
4401 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4404 Output - for each computation idiom that is detected we create a new stmt
4405 that provides the same functionality and that can be vectorized. We
4406 also record some information in the struct_stmt_info of the relevant
4407 stmts, as explained below:
4409 At the entry to this function we have the following stmts, with the
4410 following initial value in the STMT_VINFO fields:
4412 stmt in_pattern_p related_stmt vec_stmt
4413 S1: a_i = .... - - -
4414 S2: a_2 = ..use(a_i).. - - -
4415 S3: a_1 = ..use(a_2).. - - -
4416 S4: a_0 = ..use(a_1).. - - -
4417 S5: ... = ..use(a_0).. - - -
4419 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4420 represented by a single stmt. We then:
4421 - create a new stmt S6 equivalent to the pattern (the stmt is not
4422 inserted into the code)
4423 - fill in the STMT_VINFO fields as follows:
4425 in_pattern_p related_stmt vec_stmt
4426 S1: a_i = .... - - -
4427 S2: a_2 = ..use(a_i).. - - -
4428 S3: a_1 = ..use(a_2).. - - -
4429 S4: a_0 = ..use(a_1).. true S6 -
4430 '---> S6: a_new = .... - S4 -
4431 S5: ... = ..use(a_0).. - - -
4433 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4434 to each other through the RELATED_STMT field).
4436 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4437 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4438 remain irrelevant unless used by stmts other than S4.
4440 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4441 (because they are marked as irrelevant). It will vectorize S6, and record
4442 a pointer to the new vector stmt VS6 from S6 (as usual).
4443 S4 will be skipped, and S5 will be vectorized as usual:
4445 in_pattern_p related_stmt vec_stmt
4446 S1: a_i = .... - - -
4447 S2: a_2 = ..use(a_i).. - - -
4448 S3: a_1 = ..use(a_2).. - - -
4449 > VS6: va_new = .... - - -
4450 S4: a_0 = ..use(a_1).. true S6 VS6
4451 '---> S6: a_new = .... - S4 VS6
4452 > VS5: ... = ..vuse(va_new).. - - -
4453 S5: ... = ..use(a_0).. - - -
4455 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4456 elsewhere), and we'll end up with:
4459 VS5: ... = ..vuse(va_new)..
4461 In case of more than one pattern statements, e.g., widen-mult with
4465 S2 a_T = (TYPE) a_t;
4466 '--> S3: a_it = (interm_type) a_t;
4467 S4 prod_T = a_T * CONST;
4468 '--> S5: prod_T' = a_it w* CONST;
4470 there may be other users of a_T outside the pattern. In that case S2 will
4471 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4472 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4473 be recorded in S3. */
4476 vect_pattern_recog (vec_info
*vinfo
)
4481 gimple_stmt_iterator si
;
4483 auto_vec
<gimple
*, 1> stmts_to_replace
;
4485 DUMP_VECT_SCOPE ("vect_pattern_recog");
4487 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4489 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4490 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4491 nbbs
= loop
->num_nodes
;
4493 /* Scan through the loop stmts, applying the pattern recognition
4494 functions starting at each stmt visited: */
4495 for (i
= 0; i
< nbbs
; i
++)
4497 basic_block bb
= bbs
[i
];
4498 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
4500 gimple
*stmt
= gsi_stmt (si
);
4501 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4502 if (stmt_info
&& STMT_VINFO_IN_PATTERN_P (stmt_info
))
4504 /* Scan over all generic vect_recog_xxx_pattern functions. */
4505 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4506 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
,
4514 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4515 for (si
= bb_vinfo
->region_begin
;
4516 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
4518 gimple
*stmt
= gsi_stmt (si
);
4519 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4521 && (!STMT_VINFO_VECTORIZABLE (stmt_info
)
4522 || STMT_VINFO_IN_PATTERN_P (stmt_info
)))
4525 /* Scan over all generic vect_recog_xxx_pattern functions. */
4526 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4527 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
,