1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
44 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
47 /* Build a constant of type TYPE, made of VALUE's bits replicated
48 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
50 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
52 int width
= tree_to_uhwi (TYPE_SIZE (inner_type
));
53 int n
= (TYPE_PRECISION (type
) + HOST_BITS_PER_WIDE_INT
- 1)
54 / HOST_BITS_PER_WIDE_INT
;
55 unsigned HOST_WIDE_INT low
, mask
;
56 HOST_WIDE_INT a
[WIDE_INT_MAX_ELTS
];
59 gcc_assert (n
&& n
<= WIDE_INT_MAX_ELTS
);
61 if (width
== HOST_BITS_PER_WIDE_INT
)
65 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
66 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
69 for (i
= 0; i
< n
; i
++)
72 gcc_assert (TYPE_PRECISION (type
) <= MAX_BITSIZE_MODE_ANY_INT
);
73 return wide_int_to_tree
74 (type
, wide_int::from_array (a
, n
, TYPE_PRECISION (type
)));
77 static GTY(()) tree vector_inner_type
;
78 static GTY(()) tree vector_last_type
;
79 static GTY(()) int vector_last_nunits
;
81 /* Return a suitable vector types made of SUBPARTS units each of mode
82 "word_mode" (the global variable). */
84 build_word_mode_vector_type (int nunits
)
86 if (!vector_inner_type
)
87 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
88 else if (vector_last_nunits
== nunits
)
90 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
91 return vector_last_type
;
94 /* We build a new type, but we canonicalize it nevertheless,
95 because it still saves some memory. */
96 vector_last_nunits
= nunits
;
97 vector_last_type
= type_hash_canon (nunits
,
98 build_vector_type (vector_inner_type
,
100 return vector_last_type
;
103 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
104 tree
, tree
, tree
, tree
, tree
, enum tree_code
,
108 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
109 tree t
, tree bitsize
, tree bitpos
)
111 if (TREE_CODE (t
) == SSA_NAME
)
113 gimple
*def_stmt
= SSA_NAME_DEF_STMT (t
);
114 if (is_gimple_assign (def_stmt
)
115 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
117 && gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
)))
118 t
= gimple_assign_rhs1 (def_stmt
);
122 if (TREE_CODE (type
) == BOOLEAN_TYPE
)
125 = build_nonstandard_integer_type (tree_to_uhwi (bitsize
), 0);
126 tree field
= gimplify_build3 (gsi
, BIT_FIELD_REF
, itype
, t
,
128 return gimplify_build2 (gsi
, NE_EXPR
, type
, field
,
129 build_zero_cst (itype
));
132 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
135 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
139 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
140 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
141 enum tree_code code
, tree type ATTRIBUTE_UNUSED
)
143 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
144 return gimplify_build1 (gsi
, code
, inner_type
, a
);
148 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
149 tree bitpos
, tree bitsize
, enum tree_code code
,
150 tree type ATTRIBUTE_UNUSED
)
152 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
153 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
154 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
155 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
156 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
159 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
161 INNER_TYPE is the type of A and B elements
163 returned expression is of signed integer type with the
164 size equal to the size of INNER_TYPE. */
166 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
167 tree bitpos
, tree bitsize
, enum tree_code code
, tree type
)
169 tree stype
= TREE_TYPE (type
);
170 tree cst_false
= build_zero_cst (stype
);
171 tree cst_true
= build_all_ones_cst (stype
);
174 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
175 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
177 cmp
= build2 (code
, boolean_type_node
, a
, b
);
178 return gimplify_build3 (gsi
, COND_EXPR
, stype
, cmp
, cst_true
, cst_false
);
181 /* Expand vector addition to scalars. This does bit twiddling
182 in order to increase parallelism:
184 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
187 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
188 (a ^ ~b) & 0x80808080
190 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
192 This optimization should be done only if 4 vector items or more
195 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
196 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
197 enum tree_code code
, tree type ATTRIBUTE_UNUSED
)
199 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
200 unsigned HOST_WIDE_INT max
;
201 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
203 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
204 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
205 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
207 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
208 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
210 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
211 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
212 if (code
== PLUS_EXPR
)
213 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
216 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
217 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
220 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
221 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
222 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
226 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
227 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
228 tree bitsize ATTRIBUTE_UNUSED
,
229 enum tree_code code ATTRIBUTE_UNUSED
,
230 tree type ATTRIBUTE_UNUSED
)
232 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
234 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
236 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
237 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
238 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
240 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
242 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
243 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
244 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
245 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
246 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
249 /* Expand a vector operation to scalars, by using many operations
250 whose type is the vector type's inner type. */
252 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
253 tree type
, tree inner_type
,
254 tree a
, tree b
, enum tree_code code
)
256 vec
<constructor_elt
, va_gc
> *v
;
257 tree part_width
= TYPE_SIZE (inner_type
);
258 tree index
= bitsize_int (0);
259 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
260 int delta
= tree_to_uhwi (part_width
)
261 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type
)));
263 location_t loc
= gimple_location (gsi_stmt (*gsi
));
265 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
266 warning_at (loc
, OPT_Wvector_operation_performance
,
267 "vector operation will be expanded piecewise");
269 warning_at (loc
, OPT_Wvector_operation_performance
,
270 "vector operation will be expanded in parallel");
272 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
273 for (i
= 0; i
< nunits
;
274 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
276 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
, type
);
277 constructor_elt ce
= {NULL_TREE
, result
};
281 return build_constructor (type
, v
);
284 /* Expand a vector operation to scalars with the freedom to use
285 a scalar integer type, or to use a different size for the items
286 in the vector type. */
288 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
292 tree result
, compute_type
;
293 int n_words
= tree_to_uhwi (TYPE_SIZE_UNIT (type
)) / UNITS_PER_WORD
;
294 location_t loc
= gimple_location (gsi_stmt (*gsi
));
296 /* We have three strategies. If the type is already correct, just do
297 the operation an element at a time. Else, if the vector is wider than
298 one word, do it a word at a time; finally, if the vector is smaller
299 than one word, do it as a scalar. */
300 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
301 return expand_vector_piecewise (gsi
, f
,
302 type
, TREE_TYPE (type
),
304 else if (n_words
> 1)
306 tree word_type
= build_word_mode_vector_type (n_words
);
307 result
= expand_vector_piecewise (gsi
, f
,
308 word_type
, TREE_TYPE (word_type
),
310 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
315 /* Use a single scalar operation with a mode no wider than word_mode. */
317 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type
)), 0).require ();
318 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
319 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
, type
);
320 warning_at (loc
, OPT_Wvector_operation_performance
,
321 "vector operation will be expanded with a "
322 "single scalar operation");
328 /* Expand a vector operation to scalars; for integer types we can use
329 special bit twiddling tricks to do the sums a word at a time, using
330 function F_PARALLEL instead of F. These tricks are done only if
331 they can process at least four items, that is, only if the vector
332 holds at least four items and if a word can hold four items. */
334 expand_vector_addition (gimple_stmt_iterator
*gsi
,
335 elem_op_func f
, elem_op_func f_parallel
,
336 tree type
, tree a
, tree b
, enum tree_code code
)
338 int parts_per_word
= UNITS_PER_WORD
339 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
341 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
342 && parts_per_word
>= 4
343 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
344 return expand_vector_parallel (gsi
, f_parallel
,
347 return expand_vector_piecewise (gsi
, f
,
348 type
, TREE_TYPE (type
),
352 /* Try to expand vector comparison expression OP0 CODE OP1 by
353 querying optab if the following expression:
354 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
357 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
358 tree op1
, enum tree_code code
)
361 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0
), type
, code
)
362 && !expand_vec_cond_expr_p (type
, TREE_TYPE (op0
), code
))
363 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
364 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
371 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
372 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
373 the result if successful, otherwise return NULL_TREE. */
375 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
378 unsigned int i
, nunits
= TYPE_VECTOR_SUBPARTS (type
);
379 bool scalar_shift
= true;
381 for (i
= 1; i
< nunits
; i
++)
383 if (shiftcnts
[i
] != shiftcnts
[0])
384 scalar_shift
= false;
387 if (scalar_shift
&& shiftcnts
[0] == 0)
392 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
393 if (op
!= unknown_optab
394 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
395 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
396 build_int_cst (NULL_TREE
, shiftcnts
[0]));
399 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
400 if (op
!= unknown_optab
401 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
403 tree_vector_builder
vec (type
, nunits
, 1);
404 for (i
= 0; i
< nunits
; i
++)
405 vec
.quick_push (build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]));
406 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
, vec
.build ());
412 /* Try to expand integer vector division by constant using
413 widening multiply, shifts and additions. */
415 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
416 tree op1
, enum tree_code code
)
418 bool use_pow2
= true;
419 bool has_vector_shift
= true;
420 int mode
= -1, this_mode
;
421 int pre_shift
= -1, post_shift
;
422 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (type
);
423 int *shifts
= XALLOCAVEC (int, nunits
* 4);
424 int *pre_shifts
= shifts
+ nunits
;
425 int *post_shifts
= pre_shifts
+ nunits
;
426 int *shift_temps
= post_shifts
+ nunits
;
427 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
428 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
431 signop sign_p
= TYPE_SIGN (TREE_TYPE (type
));
432 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
433 tree cur_op
, mulcst
, tem
;
436 if (prec
> HOST_BITS_PER_WIDE_INT
)
439 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
440 if (op
== unknown_optab
441 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
442 has_vector_shift
= false;
444 /* Analysis phase. Determine if all op1 elements are either power
445 of two and it is possible to expand it using shifts (or for remainder
446 using masking). Additionally compute the multiplicative constants
447 and pre and post shifts if the division is to be expanded using
448 widening or high part multiplication plus shifts. */
449 for (i
= 0; i
< nunits
; i
++)
451 tree cst
= VECTOR_CST_ELT (op1
, i
);
452 unsigned HOST_WIDE_INT ml
;
454 if (TREE_CODE (cst
) != INTEGER_CST
|| integer_zerop (cst
))
460 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
464 shifts
[i
] = tree_log2 (cst
);
465 if (shifts
[i
] != shifts
[0]
466 && code
== TRUNC_DIV_EXPR
467 && !has_vector_shift
)
472 if (sign_p
== UNSIGNED
)
474 unsigned HOST_WIDE_INT mh
;
475 unsigned HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
) & mask
;
477 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
478 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
487 /* Find a suitable multiplier and right shift count
488 instead of multiplying with D. */
489 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
491 /* If the suggested multiplier is more than SIZE bits, we can
492 do better for even divisors, using an initial right shift. */
493 if ((mh
!= 0 && (d
& 1) == 0)
494 || (!has_vector_shift
&& pre_shift
!= -1))
496 if (has_vector_shift
)
497 pre_shift
= ctz_or_zero (d
);
498 else if (pre_shift
== -1)
501 for (j
= 0; j
< nunits
; j
++)
503 tree cst2
= VECTOR_CST_ELT (op1
, j
);
504 unsigned HOST_WIDE_INT d2
;
507 if (!tree_fits_uhwi_p (cst2
))
509 d2
= tree_to_uhwi (cst2
) & mask
;
512 this_pre_shift
= floor_log2 (d2
& -d2
);
513 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
514 pre_shift
= this_pre_shift
;
516 if (i
!= 0 && pre_shift
!= 0)
526 if ((d
>> pre_shift
) <= 1)
531 mh
= choose_multiplier (d
>> pre_shift
, prec
,
533 &ml
, &post_shift
, &dummy_int
);
535 pre_shifts
[i
] = pre_shift
;
545 HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
);
546 unsigned HOST_WIDE_INT abs_d
;
551 /* Since d might be INT_MIN, we have to cast to
552 unsigned HOST_WIDE_INT before negating to avoid
553 undefined signed overflow. */
555 ? (unsigned HOST_WIDE_INT
) d
556 : - (unsigned HOST_WIDE_INT
) d
);
558 /* n rem d = n rem -d */
559 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
561 else if (abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
563 /* This case is not handled correctly below. */
573 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
574 &post_shift
, &dummy_int
);
575 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
577 this_mode
= 4 + (d
< 0);
578 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
581 this_mode
= 2 + (d
< 0);
584 post_shifts
[i
] = post_shift
;
585 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
586 || post_shift
>= prec
587 || pre_shifts
[i
] >= prec
)
592 else if (mode
!= this_mode
)
598 tree addend
= NULL_TREE
;
599 if (sign_p
== SIGNED
)
603 /* Both division and remainder sequences need
604 op0 < 0 ? mask : 0 computed. It can be either computed as
605 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
606 if none of the shifts is 0, or as the conditional. */
607 for (i
= 0; i
< nunits
; i
++)
611 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
613 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
615 for (i
= 0; i
< nunits
; i
++)
616 shift_temps
[i
] = prec
- 1;
617 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
618 if (cur_op
!= NULL_TREE
)
620 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
622 for (i
= 0; i
< nunits
; i
++)
623 shift_temps
[i
] = prec
- shifts
[i
];
624 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
625 if (cur_op
!= NULL_TREE
)
626 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
630 if (addend
== NULL_TREE
631 && expand_vec_cond_expr_p (type
, type
, LT_EXPR
))
633 tree zero
, cst
, cond
, mask_type
;
636 mask_type
= build_same_sized_truth_vector_type (type
);
637 zero
= build_zero_cst (type
);
638 cond
= build2 (LT_EXPR
, mask_type
, op0
, zero
);
639 tree_vector_builder
vec (type
, nunits
, 1);
640 for (i
= 0; i
< nunits
; i
++)
641 vec
.quick_push (build_int_cst (TREE_TYPE (type
),
645 addend
= make_ssa_name (type
);
646 stmt
= gimple_build_assign (addend
, VEC_COND_EXPR
, cond
,
648 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
651 if (code
== TRUNC_DIV_EXPR
)
653 if (sign_p
== UNSIGNED
)
655 /* q = op0 >> shift; */
656 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
657 if (cur_op
!= NULL_TREE
)
660 else if (addend
!= NULL_TREE
)
662 /* t1 = op0 + addend;
664 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
665 if (op
!= unknown_optab
666 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
668 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
669 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
670 if (cur_op
!= NULL_TREE
)
678 tree_vector_builder
vec (type
, nunits
, 1);
679 for (i
= 0; i
< nunits
; i
++)
680 vec
.quick_push (build_int_cst (TREE_TYPE (type
),
684 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
685 if (op
!= unknown_optab
686 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
688 if (sign_p
== UNSIGNED
)
689 /* r = op0 & mask; */
690 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
691 else if (addend
!= NULL_TREE
)
693 /* t1 = op0 + addend;
696 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
697 if (op
!= unknown_optab
698 && optab_handler (op
, TYPE_MODE (type
))
701 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
703 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
705 op
= optab_for_tree_code (MINUS_EXPR
, type
,
707 if (op
!= unknown_optab
708 && optab_handler (op
, TYPE_MODE (type
))
710 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
718 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
721 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
729 gcc_assert (sign_p
== UNSIGNED
);
730 /* t1 = oprnd0 >> pre_shift;
732 q = t2 >> post_shift; */
733 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
734 if (cur_op
== NULL_TREE
)
738 gcc_assert (sign_p
== UNSIGNED
);
739 for (i
= 0; i
< nunits
; i
++)
749 gcc_assert (sign_p
== SIGNED
);
750 for (i
= 0; i
< nunits
; i
++)
751 shift_temps
[i
] = prec
- 1;
757 tree_vector_builder
vec (type
, nunits
, 1);
758 for (i
= 0; i
< nunits
; i
++)
759 vec
.quick_push (build_int_cst (TREE_TYPE (type
), mulc
[i
]));
760 mulcst
= vec
.build ();
762 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
767 /* t1 = oprnd0 >> pre_shift;
769 q = t2 >> post_shift; */
770 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
773 /* t1 = oprnd0 h* ml;
777 q = t4 >> (post_shift - 1); */
778 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
779 if (op
== unknown_optab
780 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
782 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
783 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
784 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
785 if (op
== unknown_optab
786 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
788 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
789 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
790 if (cur_op
== NULL_TREE
)
797 /* t1 = oprnd0 h* ml;
798 t2 = t1; [ iff (mode & 2) != 0 ]
799 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
800 t3 = t2 >> post_shift;
801 t4 = oprnd0 >> (prec - 1);
802 q = t3 - t4; [ iff (mode & 1) == 0 ]
803 q = t4 - t3; [ iff (mode & 1) != 0 ] */
806 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
807 if (op
== unknown_optab
808 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
810 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
812 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
813 if (cur_op
== NULL_TREE
)
815 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
816 if (tem
== NULL_TREE
)
818 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
819 if (op
== unknown_optab
820 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
823 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
825 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
831 if (code
== TRUNC_DIV_EXPR
)
834 /* We divided. Now finish by:
837 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
838 if (op
== unknown_optab
839 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
841 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
842 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
843 if (op
== unknown_optab
844 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
846 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
849 /* Expand a vector condition to scalars, by using many conditions
850 on the vector's elements. */
852 expand_vector_condition (gimple_stmt_iterator
*gsi
)
854 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
855 tree type
= gimple_expr_type (stmt
);
856 tree a
= gimple_assign_rhs1 (stmt
);
859 bool a_is_comparison
= false;
860 tree b
= gimple_assign_rhs2 (stmt
);
861 tree c
= gimple_assign_rhs3 (stmt
);
862 vec
<constructor_elt
, va_gc
> *v
;
864 tree inner_type
= TREE_TYPE (type
);
865 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
866 tree comp_inner_type
= cond_type
;
867 tree width
= TYPE_SIZE (inner_type
);
868 tree index
= bitsize_int (0);
869 tree comp_width
= width
;
870 tree comp_index
= index
;
871 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
873 location_t loc
= gimple_location (gsi_stmt (*gsi
));
875 if (!is_gimple_val (a
))
877 gcc_assert (COMPARISON_CLASS_P (a
));
878 a_is_comparison
= true;
879 a1
= TREE_OPERAND (a
, 0);
880 a2
= TREE_OPERAND (a
, 1);
881 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
882 comp_width
= TYPE_SIZE (comp_inner_type
);
885 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
), TREE_CODE (a
)))
888 /* Handle vector boolean types with bitmasks. If there is a comparison
889 and we can expand the comparison into the vector boolean bitmask,
890 or otherwise if it is compatible with type, we can transform
891 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
894 tmp_7 = tmp_6 & vbfld_4;
896 tmp_9 = tmp_8 & vbfld_5;
897 vbfld_1 = tmp_7 | tmp_9;
898 Similarly for vbfld_10 instead of x_2 < y_3. */
899 if (VECTOR_BOOLEAN_TYPE_P (type
)
900 && SCALAR_INT_MODE_P (TYPE_MODE (type
))
901 && (GET_MODE_BITSIZE (TYPE_MODE (type
))
902 < (TYPE_VECTOR_SUBPARTS (type
)
903 * GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type
)))))
905 ? useless_type_conversion_p (type
, TREE_TYPE (a
))
906 : expand_vec_cmp_expr_p (TREE_TYPE (a1
), type
, TREE_CODE (a
))))
909 a
= gimplify_build2 (gsi
, TREE_CODE (a
), type
, a1
, a2
);
910 a1
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, a
, b
);
911 a2
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, type
, a
);
912 a2
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, a2
, c
);
913 a
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, type
, a1
, a2
);
914 gimple_assign_set_rhs_from_tree (gsi
, a
);
915 update_stmt (gsi_stmt (*gsi
));
919 /* TODO: try and find a smaller vector type. */
921 warning_at (loc
, OPT_Wvector_operation_performance
,
922 "vector condition will be expanded piecewise");
924 vec_alloc (v
, nunits
);
925 for (i
= 0; i
< nunits
; i
++)
928 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
929 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
932 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
,
933 comp_width
, comp_index
);
934 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
,
935 comp_width
, comp_index
);
936 aa
= fold_build2 (TREE_CODE (a
), cond_type
, aa1
, aa2
);
939 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
940 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
941 constructor_elt ce
= {NULL_TREE
, result
};
943 index
= int_const_binop (PLUS_EXPR
, index
, width
);
944 if (width
== comp_width
)
947 comp_index
= int_const_binop (PLUS_EXPR
, comp_index
, comp_width
);
950 constr
= build_constructor (type
, v
);
951 gimple_assign_set_rhs_from_tree (gsi
, constr
);
952 update_stmt (gsi_stmt (*gsi
));
956 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
957 gassign
*assign
, enum tree_code code
)
959 machine_mode compute_mode
= TYPE_MODE (compute_type
);
961 /* If the compute mode is not a vector mode (hence we are not decomposing
962 a BLKmode vector to smaller, hardware-supported vectors), we may want
963 to expand the operations in parallel. */
964 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
965 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
966 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
967 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
968 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
969 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
974 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
975 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
976 gimple_assign_rhs1 (assign
),
977 gimple_assign_rhs2 (assign
), code
);
981 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
982 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
983 gimple_assign_rhs1 (assign
),
990 return expand_vector_parallel (gsi
, do_binop
, type
,
991 gimple_assign_rhs1 (assign
),
992 gimple_assign_rhs2 (assign
), code
);
995 return expand_vector_parallel (gsi
, do_unop
, type
,
996 gimple_assign_rhs1 (assign
),
1011 case UNORDERED_EXPR
:
1013 tree rhs1
= gimple_assign_rhs1 (assign
);
1014 tree rhs2
= gimple_assign_rhs2 (assign
);
1016 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
1019 case TRUNC_DIV_EXPR
:
1020 case TRUNC_MOD_EXPR
:
1022 tree rhs1
= gimple_assign_rhs1 (assign
);
1023 tree rhs2
= gimple_assign_rhs2 (assign
);
1027 || !VECTOR_INTEGER_TYPE_P (type
)
1028 || TREE_CODE (rhs2
) != VECTOR_CST
1029 || !VECTOR_MODE_P (TYPE_MODE (type
)))
1032 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
1033 if (ret
!= NULL_TREE
)
1042 if (TREE_CODE_CLASS (code
) == tcc_unary
)
1043 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
1044 gimple_assign_rhs1 (assign
),
1047 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
1048 gimple_assign_rhs1 (assign
),
1049 gimple_assign_rhs2 (assign
), code
);
1053 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1055 _9 = { b_7, b_7, b_7, b_7 };
1056 a_5 = _9 + { 0, 3, 6, 9 };
1057 because vector splat operation is usually more efficient
1058 than piecewise initialization of the vector. */
1061 optimize_vector_constructor (gimple_stmt_iterator
*gsi
)
1063 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1064 tree lhs
= gimple_assign_lhs (stmt
);
1065 tree rhs
= gimple_assign_rhs1 (stmt
);
1066 tree type
= TREE_TYPE (rhs
);
1067 unsigned int i
, j
, nelts
= TYPE_VECTOR_SUBPARTS (type
);
1068 bool all_same
= true;
1069 constructor_elt
*elt
;
1071 tree base
= NULL_TREE
;
1074 if (nelts
<= 2 || CONSTRUCTOR_NELTS (rhs
) != nelts
)
1076 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
1077 if (op
== unknown_optab
1078 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
1080 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
1081 if (TREE_CODE (elt
->value
) != SSA_NAME
1082 || TREE_CODE (TREE_TYPE (elt
->value
)) == VECTOR_TYPE
)
1086 tree this_base
= elt
->value
;
1087 if (this_base
!= CONSTRUCTOR_ELT (rhs
, 0)->value
)
1089 for (j
= 0; j
< nelts
+ 1; j
++)
1091 g
= SSA_NAME_DEF_STMT (this_base
);
1092 if (is_gimple_assign (g
)
1093 && gimple_assign_rhs_code (g
) == PLUS_EXPR
1094 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
1095 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
1096 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
1097 this_base
= gimple_assign_rhs1 (g
);
1103 else if (this_base
!= base
)
1108 tree_vector_builder
cst (type
, nelts
, 1);
1109 for (i
= 0; i
< nelts
; i
++)
1111 tree this_base
= CONSTRUCTOR_ELT (rhs
, i
)->value
;
1112 tree elt
= build_zero_cst (TREE_TYPE (base
));
1113 while (this_base
!= base
)
1115 g
= SSA_NAME_DEF_STMT (this_base
);
1116 elt
= fold_binary (PLUS_EXPR
, TREE_TYPE (base
),
1117 elt
, gimple_assign_rhs2 (g
));
1118 if (elt
== NULL_TREE
1119 || TREE_CODE (elt
) != INTEGER_CST
1120 || TREE_OVERFLOW (elt
))
1122 this_base
= gimple_assign_rhs1 (g
);
1124 cst
.quick_push (elt
);
1126 for (i
= 0; i
< nelts
; i
++)
1127 CONSTRUCTOR_ELT (rhs
, i
)->value
= base
;
1128 g
= gimple_build_assign (make_ssa_name (type
), rhs
);
1129 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1130 g
= gimple_build_assign (lhs
, PLUS_EXPR
, gimple_assign_lhs (g
),
1132 gsi_replace (gsi
, g
, false);
1135 /* Return a type for the widest vector mode whose components are of type
1136 TYPE, or NULL_TREE if none is found. */
1139 type_for_widest_vector_mode (tree type
, optab op
)
1141 machine_mode inner_mode
= TYPE_MODE (type
);
1142 machine_mode best_mode
= VOIDmode
, mode
;
1143 int best_nunits
= 0;
1145 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1146 mode
= MIN_MODE_VECTOR_FLOAT
;
1147 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1148 mode
= MIN_MODE_VECTOR_FRACT
;
1149 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1150 mode
= MIN_MODE_VECTOR_UFRACT
;
1151 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1152 mode
= MIN_MODE_VECTOR_ACCUM
;
1153 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1154 mode
= MIN_MODE_VECTOR_UACCUM
;
1156 mode
= MIN_MODE_VECTOR_INT
;
1158 FOR_EACH_MODE_FROM (mode
, mode
)
1159 if (GET_MODE_INNER (mode
) == inner_mode
1160 && GET_MODE_NUNITS (mode
) > best_nunits
1161 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1162 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1164 if (best_mode
== VOIDmode
)
1167 return build_vector_type_for_mode (type
, best_mode
);
1171 /* Build a reference to the element of the vector VECT. Function
1172 returns either the element itself, either BIT_FIELD_REF, or an
1173 ARRAY_REF expression.
1175 GSI is required to insert temporary variables while building a
1176 refernece to the element of the vector VECT.
1178 PTMPVEC is a pointer to the temporary variable for caching
1179 purposes. In case when PTMPVEC is NULL new temporary variable
1182 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1184 tree vect_type
, vect_elt_type
;
1188 bool need_asgn
= true;
1189 unsigned int elements
;
1191 vect_type
= TREE_TYPE (vect
);
1192 vect_elt_type
= TREE_TYPE (vect_type
);
1193 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1195 if (TREE_CODE (idx
) == INTEGER_CST
)
1197 unsigned HOST_WIDE_INT index
;
1199 /* Given that we're about to compute a binary modulus,
1200 we don't care about the high bits of the value. */
1201 index
= TREE_INT_CST_LOW (idx
);
1202 if (!tree_fits_uhwi_p (idx
) || index
>= elements
)
1204 index
&= elements
- 1;
1205 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1208 /* When lowering a vector statement sequence do some easy
1209 simplification by looking through intermediate vector results. */
1210 if (TREE_CODE (vect
) == SSA_NAME
)
1212 gimple
*def_stmt
= SSA_NAME_DEF_STMT (vect
);
1213 if (is_gimple_assign (def_stmt
)
1214 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1215 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1216 vect
= gimple_assign_rhs1 (def_stmt
);
1219 if (TREE_CODE (vect
) == VECTOR_CST
)
1220 return VECTOR_CST_ELT (vect
, index
);
1221 else if (TREE_CODE (vect
) == CONSTRUCTOR
1222 && (CONSTRUCTOR_NELTS (vect
) == 0
1223 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1226 if (index
< CONSTRUCTOR_NELTS (vect
))
1227 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1228 return build_zero_cst (vect_elt_type
);
1232 tree size
= TYPE_SIZE (vect_elt_type
);
1233 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1235 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1240 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1242 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1251 TREE_ADDRESSABLE (tmpvec
) = 1;
1252 asgn
= gimple_build_assign (tmpvec
, vect
);
1253 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1256 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1257 return build4 (ARRAY_REF
, vect_elt_type
,
1258 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1259 idx
, NULL_TREE
, NULL_TREE
);
1262 /* Check if VEC_PERM_EXPR within the given setting is supported
1263 by hardware, or lower it piecewise.
1265 When VEC_PERM_EXPR has the same first and second operands:
1266 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1267 {v0[mask[0]], v0[mask[1]], ...}
1268 MASK and V0 must have the same number of elements.
1270 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1271 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1272 V0 and V1 must have the same type. MASK, V0, V1 must have the
1273 same number of arguments. */
1276 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1278 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1279 tree mask
= gimple_assign_rhs3 (stmt
);
1280 tree vec0
= gimple_assign_rhs1 (stmt
);
1281 tree vec1
= gimple_assign_rhs2 (stmt
);
1282 tree vect_type
= TREE_TYPE (vec0
);
1283 tree mask_type
= TREE_TYPE (mask
);
1284 tree vect_elt_type
= TREE_TYPE (vect_type
);
1285 tree mask_elt_type
= TREE_TYPE (mask_type
);
1286 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1287 vec
<constructor_elt
, va_gc
> *v
;
1288 tree constr
, t
, si
, i_val
;
1289 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1290 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1291 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1294 if (TREE_CODE (mask
) == SSA_NAME
)
1296 gimple
*def_stmt
= SSA_NAME_DEF_STMT (mask
);
1297 if (is_gimple_assign (def_stmt
)
1298 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1299 mask
= gimple_assign_rhs1 (def_stmt
);
1302 vec_perm_builder sel_int
;
1304 if (TREE_CODE (mask
) == VECTOR_CST
1305 && tree_to_vec_perm_builder (&sel_int
, mask
))
1307 vec_perm_indices
indices (sel_int
, 2, elements
);
1308 if (can_vec_perm_const_p (TYPE_MODE (vect_type
), indices
))
1310 gimple_assign_set_rhs3 (stmt
, mask
);
1314 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1315 vector as VEC1 and a right element shift MASK. */
1316 if (optab_handler (vec_shr_optab
, TYPE_MODE (vect_type
))
1318 && TREE_CODE (vec1
) == VECTOR_CST
1319 && initializer_zerop (vec1
)
1321 && indices
[0] < elements
)
1323 bool ok_p
= indices
.series_p (0, 1, indices
[0], 1);
1326 for (i
= 1; i
< elements
; ++i
)
1328 unsigned int expected
= i
+ indices
[0];
1329 /* Indices into the second vector are all equivalent. */
1330 if (MIN (elements
, (unsigned) indices
[i
])
1331 != MIN (elements
, expected
))
1334 ok_p
= i
== elements
;
1338 gimple_assign_set_rhs3 (stmt
, mask
);
1344 else if (can_vec_perm_var_p (TYPE_MODE (vect_type
)))
1347 warning_at (loc
, OPT_Wvector_operation_performance
,
1348 "vector shuffling operation will be expanded piecewise");
1350 vec_alloc (v
, elements
);
1351 for (i
= 0; i
< elements
; i
++)
1354 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1356 if (TREE_CODE (i_val
) == INTEGER_CST
)
1358 unsigned HOST_WIDE_INT index
;
1360 index
= TREE_INT_CST_LOW (i_val
);
1361 if (!tree_fits_uhwi_p (i_val
) || index
>= elements
)
1362 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1364 if (two_operand_p
&& (index
& elements
) != 0)
1365 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1367 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1369 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1370 true, GSI_SAME_STMT
);
1374 tree cond
= NULL_TREE
, v0_val
;
1378 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1379 build_int_cst (mask_elt_type
, elements
));
1380 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1381 true, GSI_SAME_STMT
);
1384 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1385 build_int_cst (mask_elt_type
, elements
- 1));
1386 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1387 true, GSI_SAME_STMT
);
1389 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1390 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1391 true, GSI_SAME_STMT
);
1397 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1398 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1399 true, GSI_SAME_STMT
);
1401 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1402 cond
, build_zero_cst (mask_elt_type
));
1403 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1404 cond
, v0_val
, v1_val
);
1405 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1406 true, GSI_SAME_STMT
);
1412 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1415 constr
= build_constructor (vect_type
, v
);
1416 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1417 update_stmt (gsi_stmt (*gsi
));
1420 /* If OP is a uniform vector return the element it is a splat from. */
1423 ssa_uniform_vector_p (tree op
)
1425 if (TREE_CODE (op
) == VECTOR_CST
1426 || TREE_CODE (op
) == VEC_DUPLICATE_EXPR
1427 || TREE_CODE (op
) == CONSTRUCTOR
)
1428 return uniform_vector_p (op
);
1429 if (TREE_CODE (op
) == SSA_NAME
)
1431 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
1432 if (gimple_assign_single_p (def_stmt
))
1433 return uniform_vector_p (gimple_assign_rhs1 (def_stmt
));
1438 /* Return type in which CODE operation with optab OP can be
1442 get_compute_type (enum tree_code code
, optab op
, tree type
)
1444 /* For very wide vectors, try using a smaller vector mode. */
1445 tree compute_type
= type
;
1447 && (!VECTOR_MODE_P (TYPE_MODE (type
))
1448 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
))
1450 tree vector_compute_type
1451 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1452 if (vector_compute_type
!= NULL_TREE
1453 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
1454 < TYPE_VECTOR_SUBPARTS (compute_type
))
1455 && TYPE_VECTOR_SUBPARTS (vector_compute_type
) > 1
1456 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1457 != CODE_FOR_nothing
))
1458 compute_type
= vector_compute_type
;
1461 /* If we are breaking a BLKmode vector into smaller pieces,
1462 type_for_widest_vector_mode has already looked into the optab,
1463 so skip these checks. */
1464 if (compute_type
== type
)
1466 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1467 if (VECTOR_MODE_P (compute_mode
))
1469 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1470 return compute_type
;
1471 if (code
== MULT_HIGHPART_EXPR
1472 && can_mult_highpart_p (compute_mode
,
1473 TYPE_UNSIGNED (compute_type
)))
1474 return compute_type
;
1476 /* There is no operation in hardware, so fall back to scalars. */
1477 compute_type
= TREE_TYPE (type
);
1480 return compute_type
;
1483 /* Helper function of expand_vector_operations_1. Return number of
1484 vector elements for vector types or 1 for other types. */
1487 count_type_subparts (tree type
)
1489 return VECTOR_TYPE_P (type
) ? TYPE_VECTOR_SUBPARTS (type
) : 1;
1493 do_cond (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
1494 tree bitpos
, tree bitsize
, enum tree_code code
,
1495 tree type ATTRIBUTE_UNUSED
)
1497 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
1498 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
1499 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
1500 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
1501 tree cond
= gimple_assign_rhs1 (gsi_stmt (*gsi
));
1502 return gimplify_build3 (gsi
, code
, inner_type
, unshare_expr (cond
), a
, b
);
1505 /* Expand a vector COND_EXPR to scalars, piecewise. */
1507 expand_vector_scalar_condition (gimple_stmt_iterator
*gsi
)
1509 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1510 tree type
= gimple_expr_type (stmt
);
1511 tree compute_type
= get_compute_type (COND_EXPR
, mov_optab
, type
);
1512 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1513 gcc_assert (compute_mode
!= BLKmode
);
1514 tree lhs
= gimple_assign_lhs (stmt
);
1515 tree rhs2
= gimple_assign_rhs2 (stmt
);
1516 tree rhs3
= gimple_assign_rhs3 (stmt
);
1519 /* If the compute mode is not a vector mode (hence we are not decomposing
1520 a BLKmode vector to smaller, hardware-supported vectors), we may want
1521 to expand the operations in parallel. */
1522 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
1523 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
1524 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
1525 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
1526 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
1527 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
1528 new_rhs
= expand_vector_parallel (gsi
, do_cond
, type
, rhs2
, rhs3
,
1531 new_rhs
= expand_vector_piecewise (gsi
, do_cond
, type
, compute_type
,
1532 rhs2
, rhs3
, COND_EXPR
);
1533 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1534 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1537 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1538 way to do it is change expand_vector_operation and its callees to
1539 return a tree_code, RHS1 and RHS2 instead of a tree. */
1540 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1541 update_stmt (gsi_stmt (*gsi
));
1544 /* Process one statement. If we identify a vector operation, expand it. */
1547 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
1549 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
= NULL_TREE
;
1550 enum tree_code code
;
1551 optab op
= unknown_optab
;
1552 enum gimple_rhs_class rhs_class
;
1555 /* Only consider code == GIMPLE_ASSIGN. */
1556 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (*gsi
));
1560 code
= gimple_assign_rhs_code (stmt
);
1561 rhs_class
= get_gimple_rhs_class (code
);
1562 lhs
= gimple_assign_lhs (stmt
);
1564 if (code
== VEC_PERM_EXPR
)
1566 lower_vec_perm (gsi
);
1570 if (code
== VEC_COND_EXPR
)
1572 expand_vector_condition (gsi
);
1576 if (code
== COND_EXPR
1577 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == VECTOR_TYPE
1578 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == BLKmode
)
1580 expand_vector_scalar_condition (gsi
);
1584 if (code
== CONSTRUCTOR
1585 && TREE_CODE (lhs
) == SSA_NAME
1586 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs
)))
1587 && !gimple_clobber_p (stmt
)
1590 optimize_vector_constructor (gsi
);
1594 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
1597 rhs1
= gimple_assign_rhs1 (stmt
);
1598 type
= gimple_expr_type (stmt
);
1599 if (rhs_class
== GIMPLE_BINARY_RHS
)
1600 rhs2
= gimple_assign_rhs2 (stmt
);
1602 if (!VECTOR_TYPE_P (type
)
1603 || !VECTOR_TYPE_P (TREE_TYPE (rhs1
)))
1606 /* If the vector operation is operating on all same vector elements
1607 implement it with a scalar operation and a splat if the target
1608 supports the scalar operation. */
1609 tree srhs1
, srhs2
= NULL_TREE
;
1610 if ((srhs1
= ssa_uniform_vector_p (rhs1
)) != NULL_TREE
1611 && (rhs2
== NULL_TREE
1612 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2
))
1614 || (srhs2
= ssa_uniform_vector_p (rhs2
)) != NULL_TREE
)
1615 /* As we query direct optabs restrict to non-convert operations. */
1616 && TYPE_MODE (TREE_TYPE (type
)) == TYPE_MODE (TREE_TYPE (srhs1
)))
1618 op
= optab_for_tree_code (code
, TREE_TYPE (type
), optab_scalar
);
1619 if (op
>= FIRST_NORM_OPTAB
&& op
<= LAST_NORM_OPTAB
1620 && optab_handler (op
, TYPE_MODE (TREE_TYPE (type
))) != CODE_FOR_nothing
)
1622 tree slhs
= make_ssa_name (TREE_TYPE (srhs1
));
1623 gimple
*repl
= gimple_build_assign (slhs
, code
, srhs1
, srhs2
);
1624 gsi_insert_before (gsi
, repl
, GSI_SAME_STMT
);
1625 gimple_assign_set_rhs_from_tree (gsi
,
1626 build_vector_from_val (type
, slhs
));
1632 /* A scalar operation pretending to be a vector one. */
1633 if (VECTOR_BOOLEAN_TYPE_P (type
)
1634 && !VECTOR_MODE_P (TYPE_MODE (type
))
1635 && TYPE_MODE (type
) != BLKmode
)
1638 if (CONVERT_EXPR_CODE_P (code
)
1639 || code
== FLOAT_EXPR
1640 || code
== FIX_TRUNC_EXPR
1641 || code
== VIEW_CONVERT_EXPR
)
1644 /* The signedness is determined from input argument. */
1645 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
1646 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
1648 type
= TREE_TYPE (rhs1
);
1649 /* We do not know how to scalarize those. */
1653 /* For widening/narrowing vector operations, the relevant type is of the
1654 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1655 calculated in the same way above. */
1656 if (code
== WIDEN_SUM_EXPR
1657 || code
== VEC_WIDEN_MULT_HI_EXPR
1658 || code
== VEC_WIDEN_MULT_LO_EXPR
1659 || code
== VEC_WIDEN_MULT_EVEN_EXPR
1660 || code
== VEC_WIDEN_MULT_ODD_EXPR
1661 || code
== VEC_UNPACK_HI_EXPR
1662 || code
== VEC_UNPACK_LO_EXPR
1663 || code
== VEC_PACK_TRUNC_EXPR
1664 || code
== VEC_PACK_SAT_EXPR
1665 || code
== VEC_PACK_FIX_TRUNC_EXPR
1666 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
1667 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
1669 type
= TREE_TYPE (rhs1
);
1670 /* We do not know how to scalarize those. */
1674 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1676 if (code
== LSHIFT_EXPR
1677 || code
== RSHIFT_EXPR
1678 || code
== LROTATE_EXPR
1679 || code
== RROTATE_EXPR
)
1683 /* Check whether we have vector <op> {x,x,x,x} where x
1684 could be a scalar variable or a constant. Transform
1685 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1686 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1690 if ((first
= ssa_uniform_vector_p (rhs2
)) != NULL_TREE
)
1692 gimple_assign_set_rhs2 (stmt
, first
);
1698 opv
= optab_for_tree_code (code
, type
, optab_vector
);
1699 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1703 op
= optab_for_tree_code (code
, type
, optab_scalar
);
1705 compute_type
= get_compute_type (code
, op
, type
);
1706 if (compute_type
== type
)
1708 /* The rtl expander will expand vector/scalar as vector/vector
1709 if necessary. Pick one with wider vector type. */
1710 tree compute_vtype
= get_compute_type (code
, opv
, type
);
1711 if (count_type_subparts (compute_vtype
)
1712 > count_type_subparts (compute_type
))
1714 compute_type
= compute_vtype
;
1719 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1721 if (compute_type
== NULL_TREE
)
1722 compute_type
= get_compute_type (code
, op
, type
);
1723 if (compute_type
== type
)
1725 /* Before splitting vector rotates into scalar rotates,
1726 see if we can't use vector shifts and BIT_IOR_EXPR
1727 instead. For vector by vector rotates we'd also
1728 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1729 for now, fold doesn't seem to create such rotates anyway. */
1730 if (compute_type
== TREE_TYPE (type
)
1731 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1733 optab oplv
= vashl_optab
, opl
= ashl_optab
;
1734 optab oprv
= vlshr_optab
, opr
= lshr_optab
, opo
= ior_optab
;
1735 tree compute_lvtype
= get_compute_type (LSHIFT_EXPR
, oplv
, type
);
1736 tree compute_rvtype
= get_compute_type (RSHIFT_EXPR
, oprv
, type
);
1737 tree compute_otype
= get_compute_type (BIT_IOR_EXPR
, opo
, type
);
1738 tree compute_ltype
= get_compute_type (LSHIFT_EXPR
, opl
, type
);
1739 tree compute_rtype
= get_compute_type (RSHIFT_EXPR
, opr
, type
);
1740 /* The rtl expander will expand vector/scalar as vector/vector
1741 if necessary. Pick one with wider vector type. */
1742 if (count_type_subparts (compute_lvtype
)
1743 > count_type_subparts (compute_ltype
))
1745 compute_ltype
= compute_lvtype
;
1748 if (count_type_subparts (compute_rvtype
)
1749 > count_type_subparts (compute_rtype
))
1751 compute_rtype
= compute_rvtype
;
1754 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1756 compute_type
= compute_ltype
;
1757 if (count_type_subparts (compute_type
)
1758 > count_type_subparts (compute_rtype
))
1759 compute_type
= compute_rtype
;
1760 if (count_type_subparts (compute_type
)
1761 > count_type_subparts (compute_otype
))
1762 compute_type
= compute_otype
;
1763 /* Verify all 3 operations can be performed in that type. */
1764 if (compute_type
!= TREE_TYPE (type
))
1766 if (optab_handler (opl
, TYPE_MODE (compute_type
))
1768 || optab_handler (opr
, TYPE_MODE (compute_type
))
1770 || optab_handler (opo
, TYPE_MODE (compute_type
))
1771 == CODE_FOR_nothing
)
1772 compute_type
= TREE_TYPE (type
);
1778 op
= optab_for_tree_code (code
, type
, optab_default
);
1780 /* Optabs will try converting a negation into a subtraction, so
1781 look for it as well. TODO: negation of floating-point vectors
1782 might be turned into an exclusive OR toggling the sign bit. */
1783 if (op
== unknown_optab
1784 && code
== NEGATE_EXPR
1785 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
1786 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
1788 if (compute_type
== NULL_TREE
)
1789 compute_type
= get_compute_type (code
, op
, type
);
1790 if (compute_type
== type
)
1793 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
1795 /* Leave expression untouched for later expansion. */
1796 if (new_rhs
== NULL_TREE
)
1799 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1800 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1803 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1804 way to do it is change expand_vector_operation and its callees to
1805 return a tree_code, RHS1 and RHS2 instead of a tree. */
1806 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1807 update_stmt (gsi_stmt (*gsi
));
1810 /* Use this to lower vector operations introduced by the vectorizer,
1811 if it may need the bit-twiddling tricks implemented in this file. */
1814 expand_vector_operations (void)
1816 gimple_stmt_iterator gsi
;
1818 bool cfg_changed
= false;
1820 FOR_EACH_BB_FN (bb
, cfun
)
1822 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1824 expand_vector_operations_1 (&gsi
);
1825 /* ??? If we do not cleanup EH then we will ICE in
1826 verification. But in reality we have created wrong-code
1827 as we did not properly transition EH info and edges to
1828 the piecewise computations. */
1829 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
1830 && gimple_purge_dead_eh_edges (bb
))
1835 return cfg_changed
? TODO_cleanup_cfg
: 0;
1840 const pass_data pass_data_lower_vector
=
1842 GIMPLE_PASS
, /* type */
1843 "veclower", /* name */
1844 OPTGROUP_VEC
, /* optinfo_flags */
1845 TV_NONE
, /* tv_id */
1846 PROP_cfg
, /* properties_required */
1847 PROP_gimple_lvec
, /* properties_provided */
1848 0, /* properties_destroyed */
1849 0, /* todo_flags_start */
1850 TODO_update_ssa
, /* todo_flags_finish */
1853 class pass_lower_vector
: public gimple_opt_pass
1856 pass_lower_vector (gcc::context
*ctxt
)
1857 : gimple_opt_pass (pass_data_lower_vector
, ctxt
)
1860 /* opt_pass methods: */
1861 virtual bool gate (function
*fun
)
1863 return !(fun
->curr_properties
& PROP_gimple_lvec
);
1866 virtual unsigned int execute (function
*)
1868 return expand_vector_operations ();
1871 }; // class pass_lower_vector
1876 make_pass_lower_vector (gcc::context
*ctxt
)
1878 return new pass_lower_vector (ctxt
);
1883 const pass_data pass_data_lower_vector_ssa
=
1885 GIMPLE_PASS
, /* type */
1886 "veclower2", /* name */
1887 OPTGROUP_VEC
, /* optinfo_flags */
1888 TV_NONE
, /* tv_id */
1889 PROP_cfg
, /* properties_required */
1890 PROP_gimple_lvec
, /* properties_provided */
1891 0, /* properties_destroyed */
1892 0, /* todo_flags_start */
1894 | TODO_cleanup_cfg
), /* todo_flags_finish */
1897 class pass_lower_vector_ssa
: public gimple_opt_pass
1900 pass_lower_vector_ssa (gcc::context
*ctxt
)
1901 : gimple_opt_pass (pass_data_lower_vector_ssa
, ctxt
)
1904 /* opt_pass methods: */
1905 opt_pass
* clone () { return new pass_lower_vector_ssa (m_ctxt
); }
1906 virtual unsigned int execute (function
*)
1908 return expand_vector_operations ();
1911 }; // class pass_lower_vector_ssa
1916 make_pass_lower_vector_ssa (gcc::context
*ctxt
)
1918 return new pass_lower_vector_ssa (ctxt
);
1921 #include "gt-tree-vect-generic.h"