1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
40 #include "tree-vector-builder.h"
43 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
46 /* Build a constant of type TYPE, made of VALUE's bits replicated
47 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
49 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
51 int width
= tree_to_uhwi (TYPE_SIZE (inner_type
));
52 int n
= (TYPE_PRECISION (type
) + HOST_BITS_PER_WIDE_INT
- 1)
53 / HOST_BITS_PER_WIDE_INT
;
54 unsigned HOST_WIDE_INT low
, mask
;
55 HOST_WIDE_INT a
[WIDE_INT_MAX_ELTS
];
58 gcc_assert (n
&& n
<= WIDE_INT_MAX_ELTS
);
60 if (width
== HOST_BITS_PER_WIDE_INT
)
64 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
65 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
68 for (i
= 0; i
< n
; i
++)
71 gcc_assert (TYPE_PRECISION (type
) <= MAX_BITSIZE_MODE_ANY_INT
);
72 return wide_int_to_tree
73 (type
, wide_int::from_array (a
, n
, TYPE_PRECISION (type
)));
76 static GTY(()) tree vector_inner_type
;
77 static GTY(()) tree vector_last_type
;
78 static GTY(()) int vector_last_nunits
;
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81 "word_mode" (the global variable). */
83 build_word_mode_vector_type (int nunits
)
85 if (!vector_inner_type
)
86 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
87 else if (vector_last_nunits
== nunits
)
89 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
90 return vector_last_type
;
93 /* We build a new type, but we canonicalize it nevertheless,
94 because it still saves some memory. */
95 vector_last_nunits
= nunits
;
96 vector_last_type
= type_hash_canon (nunits
,
97 build_vector_type (vector_inner_type
,
99 return vector_last_type
;
102 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
103 tree
, tree
, tree
, tree
, tree
, enum tree_code
,
107 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
108 tree t
, tree bitsize
, tree bitpos
)
110 if (TREE_CODE (t
) == SSA_NAME
)
112 gimple
*def_stmt
= SSA_NAME_DEF_STMT (t
);
113 if (is_gimple_assign (def_stmt
)
114 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
116 && gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
)))
117 t
= gimple_assign_rhs1 (def_stmt
);
121 if (TREE_CODE (type
) == BOOLEAN_TYPE
)
124 = build_nonstandard_integer_type (tree_to_uhwi (bitsize
), 0);
125 tree field
= gimplify_build3 (gsi
, BIT_FIELD_REF
, itype
, t
,
127 return gimplify_build2 (gsi
, NE_EXPR
, type
, field
,
128 build_zero_cst (itype
));
131 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
134 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
138 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
139 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
140 enum tree_code code
, tree type ATTRIBUTE_UNUSED
)
142 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
143 return gimplify_build1 (gsi
, code
, inner_type
, a
);
147 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
148 tree bitpos
, tree bitsize
, enum tree_code code
,
149 tree type ATTRIBUTE_UNUSED
)
151 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
152 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
153 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
154 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
155 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
158 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
160 INNER_TYPE is the type of A and B elements
162 returned expression is of signed integer type with the
163 size equal to the size of INNER_TYPE. */
165 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
166 tree bitpos
, tree bitsize
, enum tree_code code
, tree type
)
168 tree stype
= TREE_TYPE (type
);
169 tree cst_false
= build_zero_cst (stype
);
170 tree cst_true
= build_all_ones_cst (stype
);
173 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
174 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
176 cmp
= build2 (code
, boolean_type_node
, a
, b
);
177 return gimplify_build3 (gsi
, COND_EXPR
, stype
, cmp
, cst_true
, cst_false
);
180 /* Expand vector addition to scalars. This does bit twiddling
181 in order to increase parallelism:
183 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
186 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
187 (a ^ ~b) & 0x80808080
189 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
191 This optimization should be done only if 4 vector items or more
194 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
195 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
196 enum tree_code code
, tree type ATTRIBUTE_UNUSED
)
198 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
199 unsigned HOST_WIDE_INT max
;
200 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
202 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
203 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
204 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
206 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
207 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
209 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
210 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
211 if (code
== PLUS_EXPR
)
212 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
215 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
216 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
219 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
220 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
221 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
225 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
226 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
227 tree bitsize ATTRIBUTE_UNUSED
,
228 enum tree_code code ATTRIBUTE_UNUSED
,
229 tree type ATTRIBUTE_UNUSED
)
231 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
233 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
235 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
236 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
237 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
239 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
241 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
242 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
243 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
244 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
245 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
248 /* Expand a vector operation to scalars, by using many operations
249 whose type is the vector type's inner type. */
251 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
252 tree type
, tree inner_type
,
253 tree a
, tree b
, enum tree_code code
)
255 vec
<constructor_elt
, va_gc
> *v
;
256 tree part_width
= TYPE_SIZE (inner_type
);
257 tree index
= bitsize_int (0);
258 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
259 int delta
= tree_to_uhwi (part_width
)
260 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type
)));
262 location_t loc
= gimple_location (gsi_stmt (*gsi
));
264 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
265 warning_at (loc
, OPT_Wvector_operation_performance
,
266 "vector operation will be expanded piecewise");
268 warning_at (loc
, OPT_Wvector_operation_performance
,
269 "vector operation will be expanded in parallel");
271 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
272 for (i
= 0; i
< nunits
;
273 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
275 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
, type
);
276 constructor_elt ce
= {NULL_TREE
, result
};
280 return build_constructor (type
, v
);
283 /* Expand a vector operation to scalars with the freedom to use
284 a scalar integer type, or to use a different size for the items
285 in the vector type. */
287 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
291 tree result
, compute_type
;
292 int n_words
= tree_to_uhwi (TYPE_SIZE_UNIT (type
)) / UNITS_PER_WORD
;
293 location_t loc
= gimple_location (gsi_stmt (*gsi
));
295 /* We have three strategies. If the type is already correct, just do
296 the operation an element at a time. Else, if the vector is wider than
297 one word, do it a word at a time; finally, if the vector is smaller
298 than one word, do it as a scalar. */
299 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
300 return expand_vector_piecewise (gsi
, f
,
301 type
, TREE_TYPE (type
),
303 else if (n_words
> 1)
305 tree word_type
= build_word_mode_vector_type (n_words
);
306 result
= expand_vector_piecewise (gsi
, f
,
307 word_type
, TREE_TYPE (word_type
),
309 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
314 /* Use a single scalar operation with a mode no wider than word_mode. */
316 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type
)), 0).require ();
317 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
318 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
, type
);
319 warning_at (loc
, OPT_Wvector_operation_performance
,
320 "vector operation will be expanded with a "
321 "single scalar operation");
327 /* Expand a vector operation to scalars; for integer types we can use
328 special bit twiddling tricks to do the sums a word at a time, using
329 function F_PARALLEL instead of F. These tricks are done only if
330 they can process at least four items, that is, only if the vector
331 holds at least four items and if a word can hold four items. */
333 expand_vector_addition (gimple_stmt_iterator
*gsi
,
334 elem_op_func f
, elem_op_func f_parallel
,
335 tree type
, tree a
, tree b
, enum tree_code code
)
337 int parts_per_word
= UNITS_PER_WORD
338 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
340 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
341 && parts_per_word
>= 4
342 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
343 return expand_vector_parallel (gsi
, f_parallel
,
346 return expand_vector_piecewise (gsi
, f
,
347 type
, TREE_TYPE (type
),
351 /* Try to expand vector comparison expression OP0 CODE OP1 by
352 querying optab if the following expression:
353 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
356 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
357 tree op1
, enum tree_code code
)
360 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0
), type
, code
)
361 && !expand_vec_cond_expr_p (type
, TREE_TYPE (op0
), code
))
362 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
363 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
370 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
371 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
372 the result if successful, otherwise return NULL_TREE. */
374 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
377 unsigned int i
, nunits
= TYPE_VECTOR_SUBPARTS (type
);
378 bool scalar_shift
= true;
380 for (i
= 1; i
< nunits
; i
++)
382 if (shiftcnts
[i
] != shiftcnts
[0])
383 scalar_shift
= false;
386 if (scalar_shift
&& shiftcnts
[0] == 0)
391 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
392 if (op
!= unknown_optab
393 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
394 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
395 build_int_cst (NULL_TREE
, shiftcnts
[0]));
398 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
399 if (op
!= unknown_optab
400 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
402 tree_vector_builder
vec (type
, nunits
, 1);
403 for (i
= 0; i
< nunits
; i
++)
404 vec
.quick_push (build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]));
405 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
, vec
.build ());
411 /* Try to expand integer vector division by constant using
412 widening multiply, shifts and additions. */
414 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
415 tree op1
, enum tree_code code
)
417 bool use_pow2
= true;
418 bool has_vector_shift
= true;
419 int mode
= -1, this_mode
;
420 int pre_shift
= -1, post_shift
;
421 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (type
);
422 int *shifts
= XALLOCAVEC (int, nunits
* 4);
423 int *pre_shifts
= shifts
+ nunits
;
424 int *post_shifts
= pre_shifts
+ nunits
;
425 int *shift_temps
= post_shifts
+ nunits
;
426 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
427 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
430 signop sign_p
= TYPE_SIGN (TREE_TYPE (type
));
431 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
432 tree cur_op
, mulcst
, tem
;
435 if (prec
> HOST_BITS_PER_WIDE_INT
)
438 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
439 if (op
== unknown_optab
440 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
441 has_vector_shift
= false;
443 /* Analysis phase. Determine if all op1 elements are either power
444 of two and it is possible to expand it using shifts (or for remainder
445 using masking). Additionally compute the multiplicative constants
446 and pre and post shifts if the division is to be expanded using
447 widening or high part multiplication plus shifts. */
448 for (i
= 0; i
< nunits
; i
++)
450 tree cst
= VECTOR_CST_ELT (op1
, i
);
451 unsigned HOST_WIDE_INT ml
;
453 if (TREE_CODE (cst
) != INTEGER_CST
|| integer_zerop (cst
))
459 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
463 shifts
[i
] = tree_log2 (cst
);
464 if (shifts
[i
] != shifts
[0]
465 && code
== TRUNC_DIV_EXPR
466 && !has_vector_shift
)
471 if (sign_p
== UNSIGNED
)
473 unsigned HOST_WIDE_INT mh
;
474 unsigned HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
) & mask
;
476 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
477 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
486 /* Find a suitable multiplier and right shift count
487 instead of multiplying with D. */
488 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
490 /* If the suggested multiplier is more than SIZE bits, we can
491 do better for even divisors, using an initial right shift. */
492 if ((mh
!= 0 && (d
& 1) == 0)
493 || (!has_vector_shift
&& pre_shift
!= -1))
495 if (has_vector_shift
)
496 pre_shift
= ctz_or_zero (d
);
497 else if (pre_shift
== -1)
500 for (j
= 0; j
< nunits
; j
++)
502 tree cst2
= VECTOR_CST_ELT (op1
, j
);
503 unsigned HOST_WIDE_INT d2
;
506 if (!tree_fits_uhwi_p (cst2
))
508 d2
= tree_to_uhwi (cst2
) & mask
;
511 this_pre_shift
= floor_log2 (d2
& -d2
);
512 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
513 pre_shift
= this_pre_shift
;
515 if (i
!= 0 && pre_shift
!= 0)
525 if ((d
>> pre_shift
) <= 1)
530 mh
= choose_multiplier (d
>> pre_shift
, prec
,
532 &ml
, &post_shift
, &dummy_int
);
534 pre_shifts
[i
] = pre_shift
;
544 HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
);
545 unsigned HOST_WIDE_INT abs_d
;
550 /* Since d might be INT_MIN, we have to cast to
551 unsigned HOST_WIDE_INT before negating to avoid
552 undefined signed overflow. */
554 ? (unsigned HOST_WIDE_INT
) d
555 : - (unsigned HOST_WIDE_INT
) d
);
557 /* n rem d = n rem -d */
558 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
560 else if (abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
562 /* This case is not handled correctly below. */
572 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
573 &post_shift
, &dummy_int
);
574 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
576 this_mode
= 4 + (d
< 0);
577 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
580 this_mode
= 2 + (d
< 0);
583 post_shifts
[i
] = post_shift
;
584 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
585 || post_shift
>= prec
586 || pre_shifts
[i
] >= prec
)
591 else if (mode
!= this_mode
)
597 tree addend
= NULL_TREE
;
598 if (sign_p
== SIGNED
)
602 /* Both division and remainder sequences need
603 op0 < 0 ? mask : 0 computed. It can be either computed as
604 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
605 if none of the shifts is 0, or as the conditional. */
606 for (i
= 0; i
< nunits
; i
++)
610 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
612 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
614 for (i
= 0; i
< nunits
; i
++)
615 shift_temps
[i
] = prec
- 1;
616 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
617 if (cur_op
!= NULL_TREE
)
619 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
621 for (i
= 0; i
< nunits
; i
++)
622 shift_temps
[i
] = prec
- shifts
[i
];
623 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
624 if (cur_op
!= NULL_TREE
)
625 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
629 if (addend
== NULL_TREE
630 && expand_vec_cond_expr_p (type
, type
, LT_EXPR
))
632 tree zero
, cst
, cond
, mask_type
;
635 mask_type
= build_same_sized_truth_vector_type (type
);
636 zero
= build_zero_cst (type
);
637 cond
= build2 (LT_EXPR
, mask_type
, op0
, zero
);
638 tree_vector_builder
vec (type
, nunits
, 1);
639 for (i
= 0; i
< nunits
; i
++)
640 vec
.quick_push (build_int_cst (TREE_TYPE (type
),
644 addend
= make_ssa_name (type
);
645 stmt
= gimple_build_assign (addend
, VEC_COND_EXPR
, cond
,
647 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
650 if (code
== TRUNC_DIV_EXPR
)
652 if (sign_p
== UNSIGNED
)
654 /* q = op0 >> shift; */
655 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
656 if (cur_op
!= NULL_TREE
)
659 else if (addend
!= NULL_TREE
)
661 /* t1 = op0 + addend;
663 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
664 if (op
!= unknown_optab
665 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
667 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
668 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
669 if (cur_op
!= NULL_TREE
)
677 tree_vector_builder
vec (type
, nunits
, 1);
678 for (i
= 0; i
< nunits
; i
++)
679 vec
.quick_push (build_int_cst (TREE_TYPE (type
),
683 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
684 if (op
!= unknown_optab
685 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
687 if (sign_p
== UNSIGNED
)
688 /* r = op0 & mask; */
689 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
690 else if (addend
!= NULL_TREE
)
692 /* t1 = op0 + addend;
695 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
696 if (op
!= unknown_optab
697 && optab_handler (op
, TYPE_MODE (type
))
700 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
702 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
704 op
= optab_for_tree_code (MINUS_EXPR
, type
,
706 if (op
!= unknown_optab
707 && optab_handler (op
, TYPE_MODE (type
))
709 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
717 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
720 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
728 gcc_assert (sign_p
== UNSIGNED
);
729 /* t1 = oprnd0 >> pre_shift;
731 q = t2 >> post_shift; */
732 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
733 if (cur_op
== NULL_TREE
)
737 gcc_assert (sign_p
== UNSIGNED
);
738 for (i
= 0; i
< nunits
; i
++)
748 gcc_assert (sign_p
== SIGNED
);
749 for (i
= 0; i
< nunits
; i
++)
750 shift_temps
[i
] = prec
- 1;
756 tree_vector_builder
vec (type
, nunits
, 1);
757 for (i
= 0; i
< nunits
; i
++)
758 vec
.quick_push (build_int_cst (TREE_TYPE (type
), mulc
[i
]));
759 mulcst
= vec
.build ();
761 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
766 /* t1 = oprnd0 >> pre_shift;
768 q = t2 >> post_shift; */
769 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
772 /* t1 = oprnd0 h* ml;
776 q = t4 >> (post_shift - 1); */
777 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
778 if (op
== unknown_optab
779 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
781 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
782 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
783 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
784 if (op
== unknown_optab
785 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
787 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
788 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
789 if (cur_op
== NULL_TREE
)
796 /* t1 = oprnd0 h* ml;
797 t2 = t1; [ iff (mode & 2) != 0 ]
798 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
799 t3 = t2 >> post_shift;
800 t4 = oprnd0 >> (prec - 1);
801 q = t3 - t4; [ iff (mode & 1) == 0 ]
802 q = t4 - t3; [ iff (mode & 1) != 0 ] */
805 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
806 if (op
== unknown_optab
807 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
809 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
811 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
812 if (cur_op
== NULL_TREE
)
814 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
815 if (tem
== NULL_TREE
)
817 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
818 if (op
== unknown_optab
819 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
822 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
824 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
830 if (code
== TRUNC_DIV_EXPR
)
833 /* We divided. Now finish by:
836 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
837 if (op
== unknown_optab
838 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
840 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
841 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
842 if (op
== unknown_optab
843 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
845 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
848 /* Expand a vector condition to scalars, by using many conditions
849 on the vector's elements. */
851 expand_vector_condition (gimple_stmt_iterator
*gsi
)
853 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
854 tree type
= gimple_expr_type (stmt
);
855 tree a
= gimple_assign_rhs1 (stmt
);
858 bool a_is_comparison
= false;
859 tree b
= gimple_assign_rhs2 (stmt
);
860 tree c
= gimple_assign_rhs3 (stmt
);
861 vec
<constructor_elt
, va_gc
> *v
;
863 tree inner_type
= TREE_TYPE (type
);
864 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
865 tree comp_inner_type
= cond_type
;
866 tree width
= TYPE_SIZE (inner_type
);
867 tree index
= bitsize_int (0);
868 tree comp_width
= width
;
869 tree comp_index
= index
;
870 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
872 location_t loc
= gimple_location (gsi_stmt (*gsi
));
874 if (!is_gimple_val (a
))
876 gcc_assert (COMPARISON_CLASS_P (a
));
877 a_is_comparison
= true;
878 a1
= TREE_OPERAND (a
, 0);
879 a2
= TREE_OPERAND (a
, 1);
880 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
881 comp_width
= TYPE_SIZE (comp_inner_type
);
884 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
), TREE_CODE (a
)))
887 /* Handle vector boolean types with bitmasks. If there is a comparison
888 and we can expand the comparison into the vector boolean bitmask,
889 or otherwise if it is compatible with type, we can transform
890 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
893 tmp_7 = tmp_6 & vbfld_4;
895 tmp_9 = tmp_8 & vbfld_5;
896 vbfld_1 = tmp_7 | tmp_9;
897 Similarly for vbfld_10 instead of x_2 < y_3. */
898 if (VECTOR_BOOLEAN_TYPE_P (type
)
899 && SCALAR_INT_MODE_P (TYPE_MODE (type
))
900 && (GET_MODE_BITSIZE (TYPE_MODE (type
))
901 < (TYPE_VECTOR_SUBPARTS (type
)
902 * GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type
)))))
904 ? useless_type_conversion_p (type
, TREE_TYPE (a
))
905 : expand_vec_cmp_expr_p (TREE_TYPE (a1
), type
, TREE_CODE (a
))))
908 a
= gimplify_build2 (gsi
, TREE_CODE (a
), type
, a1
, a2
);
909 a1
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, a
, b
);
910 a2
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, type
, a
);
911 a2
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, a2
, c
);
912 a
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, type
, a1
, a2
);
913 gimple_assign_set_rhs_from_tree (gsi
, a
);
914 update_stmt (gsi_stmt (*gsi
));
918 /* TODO: try and find a smaller vector type. */
920 warning_at (loc
, OPT_Wvector_operation_performance
,
921 "vector condition will be expanded piecewise");
923 vec_alloc (v
, nunits
);
924 for (i
= 0; i
< nunits
; i
++)
927 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
928 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
931 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
,
932 comp_width
, comp_index
);
933 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
,
934 comp_width
, comp_index
);
935 aa
= fold_build2 (TREE_CODE (a
), cond_type
, aa1
, aa2
);
938 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
939 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
940 constructor_elt ce
= {NULL_TREE
, result
};
942 index
= int_const_binop (PLUS_EXPR
, index
, width
);
943 if (width
== comp_width
)
946 comp_index
= int_const_binop (PLUS_EXPR
, comp_index
, comp_width
);
949 constr
= build_constructor (type
, v
);
950 gimple_assign_set_rhs_from_tree (gsi
, constr
);
951 update_stmt (gsi_stmt (*gsi
));
955 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
956 gassign
*assign
, enum tree_code code
)
958 machine_mode compute_mode
= TYPE_MODE (compute_type
);
960 /* If the compute mode is not a vector mode (hence we are not decomposing
961 a BLKmode vector to smaller, hardware-supported vectors), we may want
962 to expand the operations in parallel. */
963 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
964 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
965 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
966 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
967 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
968 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
973 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
974 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
975 gimple_assign_rhs1 (assign
),
976 gimple_assign_rhs2 (assign
), code
);
980 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
981 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
982 gimple_assign_rhs1 (assign
),
989 return expand_vector_parallel (gsi
, do_binop
, type
,
990 gimple_assign_rhs1 (assign
),
991 gimple_assign_rhs2 (assign
), code
);
994 return expand_vector_parallel (gsi
, do_unop
, type
,
995 gimple_assign_rhs1 (assign
),
1010 case UNORDERED_EXPR
:
1012 tree rhs1
= gimple_assign_rhs1 (assign
);
1013 tree rhs2
= gimple_assign_rhs2 (assign
);
1015 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
1018 case TRUNC_DIV_EXPR
:
1019 case TRUNC_MOD_EXPR
:
1021 tree rhs1
= gimple_assign_rhs1 (assign
);
1022 tree rhs2
= gimple_assign_rhs2 (assign
);
1026 || !VECTOR_INTEGER_TYPE_P (type
)
1027 || TREE_CODE (rhs2
) != VECTOR_CST
1028 || !VECTOR_MODE_P (TYPE_MODE (type
)))
1031 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
1032 if (ret
!= NULL_TREE
)
1041 if (TREE_CODE_CLASS (code
) == tcc_unary
)
1042 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
1043 gimple_assign_rhs1 (assign
),
1046 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
1047 gimple_assign_rhs1 (assign
),
1048 gimple_assign_rhs2 (assign
), code
);
1052 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1054 _9 = { b_7, b_7, b_7, b_7 };
1055 a_5 = _9 + { 0, 3, 6, 9 };
1056 because vector splat operation is usually more efficient
1057 than piecewise initialization of the vector. */
1060 optimize_vector_constructor (gimple_stmt_iterator
*gsi
)
1062 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1063 tree lhs
= gimple_assign_lhs (stmt
);
1064 tree rhs
= gimple_assign_rhs1 (stmt
);
1065 tree type
= TREE_TYPE (rhs
);
1066 unsigned int i
, j
, nelts
= TYPE_VECTOR_SUBPARTS (type
);
1067 bool all_same
= true;
1068 constructor_elt
*elt
;
1070 tree base
= NULL_TREE
;
1073 if (nelts
<= 2 || CONSTRUCTOR_NELTS (rhs
) != nelts
)
1075 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
1076 if (op
== unknown_optab
1077 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
1079 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
1080 if (TREE_CODE (elt
->value
) != SSA_NAME
1081 || TREE_CODE (TREE_TYPE (elt
->value
)) == VECTOR_TYPE
)
1085 tree this_base
= elt
->value
;
1086 if (this_base
!= CONSTRUCTOR_ELT (rhs
, 0)->value
)
1088 for (j
= 0; j
< nelts
+ 1; j
++)
1090 g
= SSA_NAME_DEF_STMT (this_base
);
1091 if (is_gimple_assign (g
)
1092 && gimple_assign_rhs_code (g
) == PLUS_EXPR
1093 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
1094 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
1095 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
1096 this_base
= gimple_assign_rhs1 (g
);
1102 else if (this_base
!= base
)
1107 tree_vector_builder
cst (type
, nelts
, 1);
1108 for (i
= 0; i
< nelts
; i
++)
1110 tree this_base
= CONSTRUCTOR_ELT (rhs
, i
)->value
;
1111 tree elt
= build_zero_cst (TREE_TYPE (base
));
1112 while (this_base
!= base
)
1114 g
= SSA_NAME_DEF_STMT (this_base
);
1115 elt
= fold_binary (PLUS_EXPR
, TREE_TYPE (base
),
1116 elt
, gimple_assign_rhs2 (g
));
1117 if (elt
== NULL_TREE
1118 || TREE_CODE (elt
) != INTEGER_CST
1119 || TREE_OVERFLOW (elt
))
1121 this_base
= gimple_assign_rhs1 (g
);
1123 cst
.quick_push (elt
);
1125 for (i
= 0; i
< nelts
; i
++)
1126 CONSTRUCTOR_ELT (rhs
, i
)->value
= base
;
1127 g
= gimple_build_assign (make_ssa_name (type
), rhs
);
1128 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1129 g
= gimple_build_assign (lhs
, PLUS_EXPR
, gimple_assign_lhs (g
),
1131 gsi_replace (gsi
, g
, false);
1134 /* Return a type for the widest vector mode whose components are of type
1135 TYPE, or NULL_TREE if none is found. */
1138 type_for_widest_vector_mode (tree type
, optab op
)
1140 machine_mode inner_mode
= TYPE_MODE (type
);
1141 machine_mode best_mode
= VOIDmode
, mode
;
1142 int best_nunits
= 0;
1144 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1145 mode
= MIN_MODE_VECTOR_FLOAT
;
1146 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1147 mode
= MIN_MODE_VECTOR_FRACT
;
1148 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1149 mode
= MIN_MODE_VECTOR_UFRACT
;
1150 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1151 mode
= MIN_MODE_VECTOR_ACCUM
;
1152 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1153 mode
= MIN_MODE_VECTOR_UACCUM
;
1155 mode
= MIN_MODE_VECTOR_INT
;
1157 FOR_EACH_MODE_FROM (mode
, mode
)
1158 if (GET_MODE_INNER (mode
) == inner_mode
1159 && GET_MODE_NUNITS (mode
) > best_nunits
1160 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1161 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1163 if (best_mode
== VOIDmode
)
1166 return build_vector_type_for_mode (type
, best_mode
);
1170 /* Build a reference to the element of the vector VECT. Function
1171 returns either the element itself, either BIT_FIELD_REF, or an
1172 ARRAY_REF expression.
1174 GSI is required to insert temporary variables while building a
1175 refernece to the element of the vector VECT.
1177 PTMPVEC is a pointer to the temporary variable for caching
1178 purposes. In case when PTMPVEC is NULL new temporary variable
1181 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1183 tree vect_type
, vect_elt_type
;
1187 bool need_asgn
= true;
1188 unsigned int elements
;
1190 vect_type
= TREE_TYPE (vect
);
1191 vect_elt_type
= TREE_TYPE (vect_type
);
1192 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1194 if (TREE_CODE (idx
) == INTEGER_CST
)
1196 unsigned HOST_WIDE_INT index
;
1198 /* Given that we're about to compute a binary modulus,
1199 we don't care about the high bits of the value. */
1200 index
= TREE_INT_CST_LOW (idx
);
1201 if (!tree_fits_uhwi_p (idx
) || index
>= elements
)
1203 index
&= elements
- 1;
1204 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1207 /* When lowering a vector statement sequence do some easy
1208 simplification by looking through intermediate vector results. */
1209 if (TREE_CODE (vect
) == SSA_NAME
)
1211 gimple
*def_stmt
= SSA_NAME_DEF_STMT (vect
);
1212 if (is_gimple_assign (def_stmt
)
1213 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1214 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1215 vect
= gimple_assign_rhs1 (def_stmt
);
1218 if (TREE_CODE (vect
) == VECTOR_CST
)
1219 return VECTOR_CST_ELT (vect
, index
);
1220 else if (TREE_CODE (vect
) == CONSTRUCTOR
1221 && (CONSTRUCTOR_NELTS (vect
) == 0
1222 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1225 if (index
< CONSTRUCTOR_NELTS (vect
))
1226 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1227 return build_zero_cst (vect_elt_type
);
1231 tree size
= TYPE_SIZE (vect_elt_type
);
1232 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1234 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1239 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1241 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1250 TREE_ADDRESSABLE (tmpvec
) = 1;
1251 asgn
= gimple_build_assign (tmpvec
, vect
);
1252 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1255 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1256 return build4 (ARRAY_REF
, vect_elt_type
,
1257 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1258 idx
, NULL_TREE
, NULL_TREE
);
1261 /* Check if VEC_PERM_EXPR within the given setting is supported
1262 by hardware, or lower it piecewise.
1264 When VEC_PERM_EXPR has the same first and second operands:
1265 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1266 {v0[mask[0]], v0[mask[1]], ...}
1267 MASK and V0 must have the same number of elements.
1269 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1270 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1271 V0 and V1 must have the same type. MASK, V0, V1 must have the
1272 same number of arguments. */
1275 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1277 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1278 tree mask
= gimple_assign_rhs3 (stmt
);
1279 tree vec0
= gimple_assign_rhs1 (stmt
);
1280 tree vec1
= gimple_assign_rhs2 (stmt
);
1281 tree vect_type
= TREE_TYPE (vec0
);
1282 tree mask_type
= TREE_TYPE (mask
);
1283 tree vect_elt_type
= TREE_TYPE (vect_type
);
1284 tree mask_elt_type
= TREE_TYPE (mask_type
);
1285 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1286 vec
<constructor_elt
, va_gc
> *v
;
1287 tree constr
, t
, si
, i_val
;
1288 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1289 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1290 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1293 if (TREE_CODE (mask
) == SSA_NAME
)
1295 gimple
*def_stmt
= SSA_NAME_DEF_STMT (mask
);
1296 if (is_gimple_assign (def_stmt
)
1297 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1298 mask
= gimple_assign_rhs1 (def_stmt
);
1301 if (TREE_CODE (mask
) == VECTOR_CST
)
1303 auto_vec_perm_indices
sel_int (elements
);
1305 for (i
= 0; i
< elements
; ++i
)
1306 sel_int
.quick_push (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask
, i
))
1307 & (2 * elements
- 1));
1309 if (can_vec_perm_p (TYPE_MODE (vect_type
), false, &sel_int
))
1311 gimple_assign_set_rhs3 (stmt
, mask
);
1315 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1316 vector as VEC1 and a right element shift MASK. */
1317 if (optab_handler (vec_shr_optab
, TYPE_MODE (vect_type
))
1319 && TREE_CODE (vec1
) == VECTOR_CST
1320 && initializer_zerop (vec1
)
1322 && sel_int
[0] < elements
)
1324 for (i
= 1; i
< elements
; ++i
)
1326 unsigned int expected
= i
+ sel_int
[0];
1327 /* Indices into the second vector are all equivalent. */
1328 if (MIN (elements
, (unsigned) sel_int
[i
])
1329 != MIN (elements
, expected
))
1334 gimple_assign_set_rhs3 (stmt
, mask
);
1340 else if (can_vec_perm_p (TYPE_MODE (vect_type
), true, NULL
))
1343 warning_at (loc
, OPT_Wvector_operation_performance
,
1344 "vector shuffling operation will be expanded piecewise");
1346 vec_alloc (v
, elements
);
1347 for (i
= 0; i
< elements
; i
++)
1350 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1352 if (TREE_CODE (i_val
) == INTEGER_CST
)
1354 unsigned HOST_WIDE_INT index
;
1356 index
= TREE_INT_CST_LOW (i_val
);
1357 if (!tree_fits_uhwi_p (i_val
) || index
>= elements
)
1358 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1360 if (two_operand_p
&& (index
& elements
) != 0)
1361 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1363 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1365 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1366 true, GSI_SAME_STMT
);
1370 tree cond
= NULL_TREE
, v0_val
;
1374 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1375 build_int_cst (mask_elt_type
, elements
));
1376 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1377 true, GSI_SAME_STMT
);
1380 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1381 build_int_cst (mask_elt_type
, elements
- 1));
1382 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1383 true, GSI_SAME_STMT
);
1385 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1386 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1387 true, GSI_SAME_STMT
);
1393 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1394 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1395 true, GSI_SAME_STMT
);
1397 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1398 cond
, build_zero_cst (mask_elt_type
));
1399 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1400 cond
, v0_val
, v1_val
);
1401 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1402 true, GSI_SAME_STMT
);
1408 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1411 constr
= build_constructor (vect_type
, v
);
1412 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1413 update_stmt (gsi_stmt (*gsi
));
1416 /* If OP is a uniform vector return the element it is a splat from. */
1419 ssa_uniform_vector_p (tree op
)
1421 if (TREE_CODE (op
) == VECTOR_CST
1422 || TREE_CODE (op
) == VEC_DUPLICATE_EXPR
1423 || TREE_CODE (op
) == CONSTRUCTOR
)
1424 return uniform_vector_p (op
);
1425 if (TREE_CODE (op
) == SSA_NAME
)
1427 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
1428 if (gimple_assign_single_p (def_stmt
))
1429 return uniform_vector_p (gimple_assign_rhs1 (def_stmt
));
1434 /* Return type in which CODE operation with optab OP can be
1438 get_compute_type (enum tree_code code
, optab op
, tree type
)
1440 /* For very wide vectors, try using a smaller vector mode. */
1441 tree compute_type
= type
;
1443 && (!VECTOR_MODE_P (TYPE_MODE (type
))
1444 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
))
1446 tree vector_compute_type
1447 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1448 if (vector_compute_type
!= NULL_TREE
1449 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
1450 < TYPE_VECTOR_SUBPARTS (compute_type
))
1451 && TYPE_VECTOR_SUBPARTS (vector_compute_type
) > 1
1452 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1453 != CODE_FOR_nothing
))
1454 compute_type
= vector_compute_type
;
1457 /* If we are breaking a BLKmode vector into smaller pieces,
1458 type_for_widest_vector_mode has already looked into the optab,
1459 so skip these checks. */
1460 if (compute_type
== type
)
1462 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1463 if (VECTOR_MODE_P (compute_mode
))
1465 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1466 return compute_type
;
1467 if (code
== MULT_HIGHPART_EXPR
1468 && can_mult_highpart_p (compute_mode
,
1469 TYPE_UNSIGNED (compute_type
)))
1470 return compute_type
;
1472 /* There is no operation in hardware, so fall back to scalars. */
1473 compute_type
= TREE_TYPE (type
);
1476 return compute_type
;
1479 /* Helper function of expand_vector_operations_1. Return number of
1480 vector elements for vector types or 1 for other types. */
1483 count_type_subparts (tree type
)
1485 return VECTOR_TYPE_P (type
) ? TYPE_VECTOR_SUBPARTS (type
) : 1;
1489 do_cond (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
1490 tree bitpos
, tree bitsize
, enum tree_code code
,
1491 tree type ATTRIBUTE_UNUSED
)
1493 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
1494 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
1495 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
1496 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
1497 tree cond
= gimple_assign_rhs1 (gsi_stmt (*gsi
));
1498 return gimplify_build3 (gsi
, code
, inner_type
, unshare_expr (cond
), a
, b
);
1501 /* Expand a vector COND_EXPR to scalars, piecewise. */
1503 expand_vector_scalar_condition (gimple_stmt_iterator
*gsi
)
1505 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1506 tree type
= gimple_expr_type (stmt
);
1507 tree compute_type
= get_compute_type (COND_EXPR
, mov_optab
, type
);
1508 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1509 gcc_assert (compute_mode
!= BLKmode
);
1510 tree lhs
= gimple_assign_lhs (stmt
);
1511 tree rhs2
= gimple_assign_rhs2 (stmt
);
1512 tree rhs3
= gimple_assign_rhs3 (stmt
);
1515 /* If the compute mode is not a vector mode (hence we are not decomposing
1516 a BLKmode vector to smaller, hardware-supported vectors), we may want
1517 to expand the operations in parallel. */
1518 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
1519 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
1520 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
1521 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
1522 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
1523 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
1524 new_rhs
= expand_vector_parallel (gsi
, do_cond
, type
, rhs2
, rhs3
,
1527 new_rhs
= expand_vector_piecewise (gsi
, do_cond
, type
, compute_type
,
1528 rhs2
, rhs3
, COND_EXPR
);
1529 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1530 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1533 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1534 way to do it is change expand_vector_operation and its callees to
1535 return a tree_code, RHS1 and RHS2 instead of a tree. */
1536 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1537 update_stmt (gsi_stmt (*gsi
));
1540 /* Process one statement. If we identify a vector operation, expand it. */
1543 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
1545 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
= NULL_TREE
;
1546 enum tree_code code
;
1547 optab op
= unknown_optab
;
1548 enum gimple_rhs_class rhs_class
;
1551 /* Only consider code == GIMPLE_ASSIGN. */
1552 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (*gsi
));
1556 code
= gimple_assign_rhs_code (stmt
);
1557 rhs_class
= get_gimple_rhs_class (code
);
1558 lhs
= gimple_assign_lhs (stmt
);
1560 if (code
== VEC_PERM_EXPR
)
1562 lower_vec_perm (gsi
);
1566 if (code
== VEC_COND_EXPR
)
1568 expand_vector_condition (gsi
);
1572 if (code
== COND_EXPR
1573 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == VECTOR_TYPE
1574 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == BLKmode
)
1576 expand_vector_scalar_condition (gsi
);
1580 if (code
== CONSTRUCTOR
1581 && TREE_CODE (lhs
) == SSA_NAME
1582 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs
)))
1583 && !gimple_clobber_p (stmt
)
1586 optimize_vector_constructor (gsi
);
1590 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
1593 rhs1
= gimple_assign_rhs1 (stmt
);
1594 type
= gimple_expr_type (stmt
);
1595 if (rhs_class
== GIMPLE_BINARY_RHS
)
1596 rhs2
= gimple_assign_rhs2 (stmt
);
1598 if (!VECTOR_TYPE_P (type
)
1599 || !VECTOR_TYPE_P (TREE_TYPE (rhs1
)))
1602 /* If the vector operation is operating on all same vector elements
1603 implement it with a scalar operation and a splat if the target
1604 supports the scalar operation. */
1605 tree srhs1
, srhs2
= NULL_TREE
;
1606 if ((srhs1
= ssa_uniform_vector_p (rhs1
)) != NULL_TREE
1607 && (rhs2
== NULL_TREE
1608 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2
))
1610 || (srhs2
= ssa_uniform_vector_p (rhs2
)) != NULL_TREE
)
1611 /* As we query direct optabs restrict to non-convert operations. */
1612 && TYPE_MODE (TREE_TYPE (type
)) == TYPE_MODE (TREE_TYPE (srhs1
)))
1614 op
= optab_for_tree_code (code
, TREE_TYPE (type
), optab_scalar
);
1615 if (op
>= FIRST_NORM_OPTAB
&& op
<= LAST_NORM_OPTAB
1616 && optab_handler (op
, TYPE_MODE (TREE_TYPE (type
))) != CODE_FOR_nothing
)
1618 tree slhs
= make_ssa_name (TREE_TYPE (srhs1
));
1619 gimple
*repl
= gimple_build_assign (slhs
, code
, srhs1
, srhs2
);
1620 gsi_insert_before (gsi
, repl
, GSI_SAME_STMT
);
1621 gimple_assign_set_rhs_from_tree (gsi
,
1622 build_vector_from_val (type
, slhs
));
1628 /* A scalar operation pretending to be a vector one. */
1629 if (VECTOR_BOOLEAN_TYPE_P (type
)
1630 && !VECTOR_MODE_P (TYPE_MODE (type
))
1631 && TYPE_MODE (type
) != BLKmode
)
1634 if (CONVERT_EXPR_CODE_P (code
)
1635 || code
== FLOAT_EXPR
1636 || code
== FIX_TRUNC_EXPR
1637 || code
== VIEW_CONVERT_EXPR
)
1640 /* The signedness is determined from input argument. */
1641 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
1642 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
1644 type
= TREE_TYPE (rhs1
);
1645 /* We do not know how to scalarize those. */
1649 /* For widening/narrowing vector operations, the relevant type is of the
1650 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1651 calculated in the same way above. */
1652 if (code
== WIDEN_SUM_EXPR
1653 || code
== VEC_WIDEN_MULT_HI_EXPR
1654 || code
== VEC_WIDEN_MULT_LO_EXPR
1655 || code
== VEC_WIDEN_MULT_EVEN_EXPR
1656 || code
== VEC_WIDEN_MULT_ODD_EXPR
1657 || code
== VEC_UNPACK_HI_EXPR
1658 || code
== VEC_UNPACK_LO_EXPR
1659 || code
== VEC_PACK_TRUNC_EXPR
1660 || code
== VEC_PACK_SAT_EXPR
1661 || code
== VEC_PACK_FIX_TRUNC_EXPR
1662 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
1663 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
1665 type
= TREE_TYPE (rhs1
);
1666 /* We do not know how to scalarize those. */
1670 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1672 if (code
== LSHIFT_EXPR
1673 || code
== RSHIFT_EXPR
1674 || code
== LROTATE_EXPR
1675 || code
== RROTATE_EXPR
)
1679 /* Check whether we have vector <op> {x,x,x,x} where x
1680 could be a scalar variable or a constant. Transform
1681 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1682 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1686 if ((first
= ssa_uniform_vector_p (rhs2
)) != NULL_TREE
)
1688 gimple_assign_set_rhs2 (stmt
, first
);
1694 opv
= optab_for_tree_code (code
, type
, optab_vector
);
1695 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1699 op
= optab_for_tree_code (code
, type
, optab_scalar
);
1701 compute_type
= get_compute_type (code
, op
, type
);
1702 if (compute_type
== type
)
1704 /* The rtl expander will expand vector/scalar as vector/vector
1705 if necessary. Pick one with wider vector type. */
1706 tree compute_vtype
= get_compute_type (code
, opv
, type
);
1707 if (count_type_subparts (compute_vtype
)
1708 > count_type_subparts (compute_type
))
1710 compute_type
= compute_vtype
;
1715 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1717 if (compute_type
== NULL_TREE
)
1718 compute_type
= get_compute_type (code
, op
, type
);
1719 if (compute_type
== type
)
1721 /* Before splitting vector rotates into scalar rotates,
1722 see if we can't use vector shifts and BIT_IOR_EXPR
1723 instead. For vector by vector rotates we'd also
1724 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1725 for now, fold doesn't seem to create such rotates anyway. */
1726 if (compute_type
== TREE_TYPE (type
)
1727 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1729 optab oplv
= vashl_optab
, opl
= ashl_optab
;
1730 optab oprv
= vlshr_optab
, opr
= lshr_optab
, opo
= ior_optab
;
1731 tree compute_lvtype
= get_compute_type (LSHIFT_EXPR
, oplv
, type
);
1732 tree compute_rvtype
= get_compute_type (RSHIFT_EXPR
, oprv
, type
);
1733 tree compute_otype
= get_compute_type (BIT_IOR_EXPR
, opo
, type
);
1734 tree compute_ltype
= get_compute_type (LSHIFT_EXPR
, opl
, type
);
1735 tree compute_rtype
= get_compute_type (RSHIFT_EXPR
, opr
, type
);
1736 /* The rtl expander will expand vector/scalar as vector/vector
1737 if necessary. Pick one with wider vector type. */
1738 if (count_type_subparts (compute_lvtype
)
1739 > count_type_subparts (compute_ltype
))
1741 compute_ltype
= compute_lvtype
;
1744 if (count_type_subparts (compute_rvtype
)
1745 > count_type_subparts (compute_rtype
))
1747 compute_rtype
= compute_rvtype
;
1750 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1752 compute_type
= compute_ltype
;
1753 if (count_type_subparts (compute_type
)
1754 > count_type_subparts (compute_rtype
))
1755 compute_type
= compute_rtype
;
1756 if (count_type_subparts (compute_type
)
1757 > count_type_subparts (compute_otype
))
1758 compute_type
= compute_otype
;
1759 /* Verify all 3 operations can be performed in that type. */
1760 if (compute_type
!= TREE_TYPE (type
))
1762 if (optab_handler (opl
, TYPE_MODE (compute_type
))
1764 || optab_handler (opr
, TYPE_MODE (compute_type
))
1766 || optab_handler (opo
, TYPE_MODE (compute_type
))
1767 == CODE_FOR_nothing
)
1768 compute_type
= TREE_TYPE (type
);
1774 op
= optab_for_tree_code (code
, type
, optab_default
);
1776 /* Optabs will try converting a negation into a subtraction, so
1777 look for it as well. TODO: negation of floating-point vectors
1778 might be turned into an exclusive OR toggling the sign bit. */
1779 if (op
== unknown_optab
1780 && code
== NEGATE_EXPR
1781 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
1782 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
1784 if (compute_type
== NULL_TREE
)
1785 compute_type
= get_compute_type (code
, op
, type
);
1786 if (compute_type
== type
)
1789 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
1791 /* Leave expression untouched for later expansion. */
1792 if (new_rhs
== NULL_TREE
)
1795 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1796 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1799 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1800 way to do it is change expand_vector_operation and its callees to
1801 return a tree_code, RHS1 and RHS2 instead of a tree. */
1802 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1803 update_stmt (gsi_stmt (*gsi
));
1806 /* Use this to lower vector operations introduced by the vectorizer,
1807 if it may need the bit-twiddling tricks implemented in this file. */
1810 expand_vector_operations (void)
1812 gimple_stmt_iterator gsi
;
1814 bool cfg_changed
= false;
1816 FOR_EACH_BB_FN (bb
, cfun
)
1818 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1820 expand_vector_operations_1 (&gsi
);
1821 /* ??? If we do not cleanup EH then we will ICE in
1822 verification. But in reality we have created wrong-code
1823 as we did not properly transition EH info and edges to
1824 the piecewise computations. */
1825 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
1826 && gimple_purge_dead_eh_edges (bb
))
1831 return cfg_changed
? TODO_cleanup_cfg
: 0;
1836 const pass_data pass_data_lower_vector
=
1838 GIMPLE_PASS
, /* type */
1839 "veclower", /* name */
1840 OPTGROUP_VEC
, /* optinfo_flags */
1841 TV_NONE
, /* tv_id */
1842 PROP_cfg
, /* properties_required */
1843 PROP_gimple_lvec
, /* properties_provided */
1844 0, /* properties_destroyed */
1845 0, /* todo_flags_start */
1846 TODO_update_ssa
, /* todo_flags_finish */
1849 class pass_lower_vector
: public gimple_opt_pass
1852 pass_lower_vector (gcc::context
*ctxt
)
1853 : gimple_opt_pass (pass_data_lower_vector
, ctxt
)
1856 /* opt_pass methods: */
1857 virtual bool gate (function
*fun
)
1859 return !(fun
->curr_properties
& PROP_gimple_lvec
);
1862 virtual unsigned int execute (function
*)
1864 return expand_vector_operations ();
1867 }; // class pass_lower_vector
1872 make_pass_lower_vector (gcc::context
*ctxt
)
1874 return new pass_lower_vector (ctxt
);
1879 const pass_data pass_data_lower_vector_ssa
=
1881 GIMPLE_PASS
, /* type */
1882 "veclower2", /* name */
1883 OPTGROUP_VEC
, /* optinfo_flags */
1884 TV_NONE
, /* tv_id */
1885 PROP_cfg
, /* properties_required */
1886 PROP_gimple_lvec
, /* properties_provided */
1887 0, /* properties_destroyed */
1888 0, /* todo_flags_start */
1890 | TODO_cleanup_cfg
), /* todo_flags_finish */
1893 class pass_lower_vector_ssa
: public gimple_opt_pass
1896 pass_lower_vector_ssa (gcc::context
*ctxt
)
1897 : gimple_opt_pass (pass_data_lower_vector_ssa
, ctxt
)
1900 /* opt_pass methods: */
1901 opt_pass
* clone () { return new pass_lower_vector_ssa (m_ctxt
); }
1902 virtual unsigned int execute (function
*)
1904 return expand_vector_operations ();
1907 }; // class pass_lower_vector_ssa
1912 make_pass_lower_vector_ssa (gcc::context
*ctxt
)
1914 return new pass_lower_vector_ssa (ctxt
);
1917 #include "gt-tree-vect-generic.h"