1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "langhooks.h"
33 #include "internal-fn.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
38 #include "tree-iterator.h"
39 #include "tree-pass.h"
41 #include "diagnostic.h"
44 /* Need to include rtl.h, expr.h, etc. for optabs. */
45 #include "insn-config.h"
54 #include "insn-codes.h"
58 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
61 /* Build a constant of type TYPE, made of VALUE's bits replicated
62 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
64 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
66 int width
= tree_to_uhwi (TYPE_SIZE (inner_type
));
67 int n
= (TYPE_PRECISION (type
) + HOST_BITS_PER_WIDE_INT
- 1)
68 / HOST_BITS_PER_WIDE_INT
;
69 unsigned HOST_WIDE_INT low
, mask
;
70 HOST_WIDE_INT a
[WIDE_INT_MAX_ELTS
];
73 gcc_assert (n
&& n
<= WIDE_INT_MAX_ELTS
);
75 if (width
== HOST_BITS_PER_WIDE_INT
)
79 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
80 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
83 for (i
= 0; i
< n
; i
++)
86 gcc_assert (TYPE_PRECISION (type
) <= MAX_BITSIZE_MODE_ANY_INT
);
87 return wide_int_to_tree
88 (type
, wide_int::from_array (a
, n
, TYPE_PRECISION (type
)));
91 static GTY(()) tree vector_inner_type
;
92 static GTY(()) tree vector_last_type
;
93 static GTY(()) int vector_last_nunits
;
95 /* Return a suitable vector types made of SUBPARTS units each of mode
96 "word_mode" (the global variable). */
98 build_word_mode_vector_type (int nunits
)
100 if (!vector_inner_type
)
101 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
102 else if (vector_last_nunits
== nunits
)
104 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
105 return vector_last_type
;
108 /* We build a new type, but we canonicalize it nevertheless,
109 because it still saves some memory. */
110 vector_last_nunits
= nunits
;
111 vector_last_type
= type_hash_canon (nunits
,
112 build_vector_type (vector_inner_type
,
114 return vector_last_type
;
117 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
118 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
121 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
122 tree t
, tree bitsize
, tree bitpos
)
125 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
127 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
131 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
132 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
135 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
136 return gimplify_build1 (gsi
, code
, inner_type
, a
);
140 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
141 tree bitpos
, tree bitsize
, enum tree_code code
)
143 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
144 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
145 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
146 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
147 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
150 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
152 INNER_TYPE is the type of A and B elements
154 returned expression is of signed integer type with the
155 size equal to the size of INNER_TYPE. */
157 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
158 tree bitpos
, tree bitsize
, enum tree_code code
)
162 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
163 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
165 comp_type
= build_nonstandard_integer_type
166 (GET_MODE_BITSIZE (TYPE_MODE (inner_type
)), 0);
168 return gimplify_build3 (gsi
, COND_EXPR
, comp_type
,
169 fold_build2 (code
, boolean_type_node
, a
, b
),
170 build_int_cst (comp_type
, -1),
171 build_int_cst (comp_type
, 0));
174 /* Expand vector addition to scalars. This does bit twiddling
175 in order to increase parallelism:
177 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
180 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
181 (a ^ ~b) & 0x80808080
183 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
185 This optimization should be done only if 4 vector items or more
188 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
189 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
192 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
193 unsigned HOST_WIDE_INT max
;
194 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
196 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
197 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
198 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
200 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
201 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
203 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
204 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
205 if (code
== PLUS_EXPR
)
206 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
209 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
210 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
213 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
214 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
215 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
219 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
220 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
221 tree bitsize ATTRIBUTE_UNUSED
,
222 enum tree_code code ATTRIBUTE_UNUSED
)
224 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
226 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
228 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
229 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
230 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
232 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
234 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
235 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
236 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
237 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
238 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
241 /* Expand a vector operation to scalars, by using many operations
242 whose type is the vector type's inner type. */
244 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
245 tree type
, tree inner_type
,
246 tree a
, tree b
, enum tree_code code
)
248 vec
<constructor_elt
, va_gc
> *v
;
249 tree part_width
= TYPE_SIZE (inner_type
);
250 tree index
= bitsize_int (0);
251 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
252 int delta
= tree_to_uhwi (part_width
)
253 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type
)));
255 location_t loc
= gimple_location (gsi_stmt (*gsi
));
257 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
258 warning_at (loc
, OPT_Wvector_operation_performance
,
259 "vector operation will be expanded piecewise");
261 warning_at (loc
, OPT_Wvector_operation_performance
,
262 "vector operation will be expanded in parallel");
264 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
265 for (i
= 0; i
< nunits
;
266 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
268 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
);
269 constructor_elt ce
= {NULL_TREE
, result
};
273 return build_constructor (type
, v
);
276 /* Expand a vector operation to scalars with the freedom to use
277 a scalar integer type, or to use a different size for the items
278 in the vector type. */
280 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
284 tree result
, compute_type
;
286 int n_words
= tree_to_uhwi (TYPE_SIZE_UNIT (type
)) / UNITS_PER_WORD
;
287 location_t loc
= gimple_location (gsi_stmt (*gsi
));
289 /* We have three strategies. If the type is already correct, just do
290 the operation an element at a time. Else, if the vector is wider than
291 one word, do it a word at a time; finally, if the vector is smaller
292 than one word, do it as a scalar. */
293 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
294 return expand_vector_piecewise (gsi
, f
,
295 type
, TREE_TYPE (type
),
297 else if (n_words
> 1)
299 tree word_type
= build_word_mode_vector_type (n_words
);
300 result
= expand_vector_piecewise (gsi
, f
,
301 word_type
, TREE_TYPE (word_type
),
303 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
308 /* Use a single scalar operation with a mode no wider than word_mode. */
309 mode
= mode_for_size (tree_to_uhwi (TYPE_SIZE (type
)), MODE_INT
, 0);
310 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
311 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
312 warning_at (loc
, OPT_Wvector_operation_performance
,
313 "vector operation will be expanded with a "
314 "single scalar operation");
320 /* Expand a vector operation to scalars; for integer types we can use
321 special bit twiddling tricks to do the sums a word at a time, using
322 function F_PARALLEL instead of F. These tricks are done only if
323 they can process at least four items, that is, only if the vector
324 holds at least four items and if a word can hold four items. */
326 expand_vector_addition (gimple_stmt_iterator
*gsi
,
327 elem_op_func f
, elem_op_func f_parallel
,
328 tree type
, tree a
, tree b
, enum tree_code code
)
330 int parts_per_word
= UNITS_PER_WORD
331 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
333 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
334 && parts_per_word
>= 4
335 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
336 return expand_vector_parallel (gsi
, f_parallel
,
339 return expand_vector_piecewise (gsi
, f
,
340 type
, TREE_TYPE (type
),
344 /* Try to expand vector comparison expression OP0 CODE OP1 by
345 querying optab if the following expression:
346 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
349 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
350 tree op1
, enum tree_code code
)
353 if (! expand_vec_cond_expr_p (type
, TREE_TYPE (op0
)))
354 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
355 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
362 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
363 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
364 the result if successful, otherwise return NULL_TREE. */
366 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
369 unsigned int i
, nunits
= TYPE_VECTOR_SUBPARTS (type
);
370 bool scalar_shift
= true;
372 for (i
= 1; i
< nunits
; i
++)
374 if (shiftcnts
[i
] != shiftcnts
[0])
375 scalar_shift
= false;
378 if (scalar_shift
&& shiftcnts
[0] == 0)
383 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
384 if (op
!= unknown_optab
385 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
386 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
387 build_int_cst (NULL_TREE
, shiftcnts
[0]));
390 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
391 if (op
!= unknown_optab
392 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
394 tree
*vec
= XALLOCAVEC (tree
, nunits
);
395 for (i
= 0; i
< nunits
; i
++)
396 vec
[i
] = build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]);
397 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
398 build_vector (type
, vec
));
404 /* Try to expand integer vector division by constant using
405 widening multiply, shifts and additions. */
407 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
408 tree op1
, enum tree_code code
)
410 bool use_pow2
= true;
411 bool has_vector_shift
= true;
412 int mode
= -1, this_mode
;
413 int pre_shift
= -1, post_shift
;
414 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (type
);
415 int *shifts
= XALLOCAVEC (int, nunits
* 4);
416 int *pre_shifts
= shifts
+ nunits
;
417 int *post_shifts
= pre_shifts
+ nunits
;
418 int *shift_temps
= post_shifts
+ nunits
;
419 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
420 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
423 signop sign_p
= TYPE_SIGN (TREE_TYPE (type
));
424 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
426 tree cur_op
, mulcst
, tem
;
429 if (prec
> HOST_BITS_PER_WIDE_INT
)
432 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
433 if (op
== unknown_optab
434 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
435 has_vector_shift
= false;
437 /* Analysis phase. Determine if all op1 elements are either power
438 of two and it is possible to expand it using shifts (or for remainder
439 using masking). Additionally compute the multiplicative constants
440 and pre and post shifts if the division is to be expanded using
441 widening or high part multiplication plus shifts. */
442 for (i
= 0; i
< nunits
; i
++)
444 tree cst
= VECTOR_CST_ELT (op1
, i
);
445 unsigned HOST_WIDE_INT ml
;
447 if (TREE_CODE (cst
) != INTEGER_CST
|| integer_zerop (cst
))
453 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
457 shifts
[i
] = tree_log2 (cst
);
458 if (shifts
[i
] != shifts
[0]
459 && code
== TRUNC_DIV_EXPR
460 && !has_vector_shift
)
465 if (sign_p
== UNSIGNED
)
467 unsigned HOST_WIDE_INT mh
;
468 unsigned HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
) & mask
;
470 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
471 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
480 /* Find a suitable multiplier and right shift count
481 instead of multiplying with D. */
482 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
484 /* If the suggested multiplier is more than SIZE bits, we can
485 do better for even divisors, using an initial right shift. */
486 if ((mh
!= 0 && (d
& 1) == 0)
487 || (!has_vector_shift
&& pre_shift
!= -1))
489 if (has_vector_shift
)
490 pre_shift
= floor_log2 (d
& -d
);
491 else if (pre_shift
== -1)
494 for (j
= 0; j
< nunits
; j
++)
496 tree cst2
= VECTOR_CST_ELT (op1
, j
);
497 unsigned HOST_WIDE_INT d2
;
500 if (!tree_fits_uhwi_p (cst2
))
502 d2
= tree_to_uhwi (cst2
) & mask
;
505 this_pre_shift
= floor_log2 (d2
& -d2
);
506 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
507 pre_shift
= this_pre_shift
;
509 if (i
!= 0 && pre_shift
!= 0)
519 if ((d
>> pre_shift
) <= 1)
524 mh
= choose_multiplier (d
>> pre_shift
, prec
,
526 &ml
, &post_shift
, &dummy_int
);
528 pre_shifts
[i
] = pre_shift
;
538 HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
);
539 unsigned HOST_WIDE_INT abs_d
;
544 /* Since d might be INT_MIN, we have to cast to
545 unsigned HOST_WIDE_INT before negating to avoid
546 undefined signed overflow. */
548 ? (unsigned HOST_WIDE_INT
) d
549 : - (unsigned HOST_WIDE_INT
) d
);
551 /* n rem d = n rem -d */
552 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
554 else if (abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
556 /* This case is not handled correctly below. */
566 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
567 &post_shift
, &dummy_int
);
568 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
570 this_mode
= 4 + (d
< 0);
571 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
574 this_mode
= 2 + (d
< 0);
577 post_shifts
[i
] = post_shift
;
578 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
579 || post_shift
>= prec
580 || pre_shifts
[i
] >= prec
)
585 else if (mode
!= this_mode
)
589 vec
= XALLOCAVEC (tree
, nunits
);
593 tree addend
= NULL_TREE
;
594 if (sign_p
== SIGNED
)
598 /* Both division and remainder sequences need
599 op0 < 0 ? mask : 0 computed. It can be either computed as
600 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
601 if none of the shifts is 0, or as the conditional. */
602 for (i
= 0; i
< nunits
; i
++)
606 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
608 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
610 for (i
= 0; i
< nunits
; i
++)
611 shift_temps
[i
] = prec
- 1;
612 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
613 if (cur_op
!= NULL_TREE
)
615 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
617 for (i
= 0; i
< nunits
; i
++)
618 shift_temps
[i
] = prec
- shifts
[i
];
619 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
620 if (cur_op
!= NULL_TREE
)
621 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
625 if (addend
== NULL_TREE
626 && expand_vec_cond_expr_p (type
, type
))
628 tree zero
, cst
, cond
;
631 zero
= build_zero_cst (type
);
632 cond
= build2 (LT_EXPR
, type
, op0
, zero
);
633 for (i
= 0; i
< nunits
; i
++)
634 vec
[i
] = build_int_cst (TREE_TYPE (type
),
635 ((unsigned HOST_WIDE_INT
) 1
637 cst
= build_vector (type
, vec
);
638 addend
= make_ssa_name (type
);
639 stmt
= gimple_build_assign (addend
, VEC_COND_EXPR
, cond
,
641 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
644 if (code
== TRUNC_DIV_EXPR
)
646 if (sign_p
== UNSIGNED
)
648 /* q = op0 >> shift; */
649 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
650 if (cur_op
!= NULL_TREE
)
653 else if (addend
!= NULL_TREE
)
655 /* t1 = op0 + addend;
657 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
658 if (op
!= unknown_optab
659 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
661 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
662 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
663 if (cur_op
!= NULL_TREE
)
671 for (i
= 0; i
< nunits
; i
++)
672 vec
[i
] = build_int_cst (TREE_TYPE (type
),
673 ((unsigned HOST_WIDE_INT
) 1
675 mask
= build_vector (type
, vec
);
676 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
677 if (op
!= unknown_optab
678 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
680 if (sign_p
== UNSIGNED
)
681 /* r = op0 & mask; */
682 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
683 else if (addend
!= NULL_TREE
)
685 /* t1 = op0 + addend;
688 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
689 if (op
!= unknown_optab
690 && optab_handler (op
, TYPE_MODE (type
))
693 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
695 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
697 op
= optab_for_tree_code (MINUS_EXPR
, type
,
699 if (op
!= unknown_optab
700 && optab_handler (op
, TYPE_MODE (type
))
702 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
710 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
713 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
721 gcc_assert (sign_p
== UNSIGNED
);
722 /* t1 = oprnd0 >> pre_shift;
724 q = t2 >> post_shift; */
725 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
726 if (cur_op
== NULL_TREE
)
730 gcc_assert (sign_p
== UNSIGNED
);
731 for (i
= 0; i
< nunits
; i
++)
741 gcc_assert (sign_p
== SIGNED
);
742 for (i
= 0; i
< nunits
; i
++)
743 shift_temps
[i
] = prec
- 1;
749 for (i
= 0; i
< nunits
; i
++)
750 vec
[i
] = build_int_cst (TREE_TYPE (type
), mulc
[i
]);
751 mulcst
= build_vector (type
, vec
);
753 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
758 /* t1 = oprnd0 >> pre_shift;
760 q = t2 >> post_shift; */
761 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
764 /* t1 = oprnd0 h* ml;
768 q = t4 >> (post_shift - 1); */
769 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
770 if (op
== unknown_optab
771 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
773 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
774 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
775 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
776 if (op
== unknown_optab
777 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
779 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
780 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
781 if (cur_op
== NULL_TREE
)
788 /* t1 = oprnd0 h* ml;
789 t2 = t1; [ iff (mode & 2) != 0 ]
790 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
791 t3 = t2 >> post_shift;
792 t4 = oprnd0 >> (prec - 1);
793 q = t3 - t4; [ iff (mode & 1) == 0 ]
794 q = t4 - t3; [ iff (mode & 1) != 0 ] */
797 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
798 if (op
== unknown_optab
799 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
801 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
803 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
804 if (cur_op
== NULL_TREE
)
806 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
807 if (tem
== NULL_TREE
)
809 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
810 if (op
== unknown_optab
811 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
814 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
816 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
822 if (code
== TRUNC_DIV_EXPR
)
825 /* We divided. Now finish by:
828 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
829 if (op
== unknown_optab
830 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
832 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
833 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
834 if (op
== unknown_optab
835 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
837 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
840 /* Expand a vector condition to scalars, by using many conditions
841 on the vector's elements. */
843 expand_vector_condition (gimple_stmt_iterator
*gsi
)
845 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
846 tree type
= gimple_expr_type (stmt
);
847 tree a
= gimple_assign_rhs1 (stmt
);
850 bool a_is_comparison
= false;
851 tree b
= gimple_assign_rhs2 (stmt
);
852 tree c
= gimple_assign_rhs3 (stmt
);
853 vec
<constructor_elt
, va_gc
> *v
;
855 tree inner_type
= TREE_TYPE (type
);
856 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
857 tree comp_inner_type
= cond_type
;
858 tree width
= TYPE_SIZE (inner_type
);
859 tree index
= bitsize_int (0);
860 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
862 location_t loc
= gimple_location (gsi_stmt (*gsi
));
864 if (!is_gimple_val (a
))
866 gcc_assert (COMPARISON_CLASS_P (a
));
867 a_is_comparison
= true;
868 a1
= TREE_OPERAND (a
, 0);
869 a2
= TREE_OPERAND (a
, 1);
870 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
873 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
)))
876 /* TODO: try and find a smaller vector type. */
878 warning_at (loc
, OPT_Wvector_operation_performance
,
879 "vector condition will be expanded piecewise");
881 vec_alloc (v
, nunits
);
882 for (i
= 0; i
< nunits
;
883 i
++, index
= int_const_binop (PLUS_EXPR
, index
, width
))
886 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
887 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
890 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
, width
, index
);
891 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
, width
, index
);
892 aa
= build2 (TREE_CODE (a
), cond_type
, aa1
, aa2
);
895 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
896 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
897 constructor_elt ce
= {NULL_TREE
, result
};
901 constr
= build_constructor (type
, v
);
902 gimple_assign_set_rhs_from_tree (gsi
, constr
);
903 update_stmt (gsi_stmt (*gsi
));
907 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
908 gassign
*assign
, enum tree_code code
)
910 machine_mode compute_mode
= TYPE_MODE (compute_type
);
912 /* If the compute mode is not a vector mode (hence we are not decomposing
913 a BLKmode vector to smaller, hardware-supported vectors), we may want
914 to expand the operations in parallel. */
915 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
916 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
917 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
918 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
919 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
920 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
925 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
926 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
927 gimple_assign_rhs1 (assign
),
928 gimple_assign_rhs2 (assign
), code
);
932 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
933 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
934 gimple_assign_rhs1 (assign
),
941 return expand_vector_parallel (gsi
, do_binop
, type
,
942 gimple_assign_rhs1 (assign
),
943 gimple_assign_rhs2 (assign
), code
);
946 return expand_vector_parallel (gsi
, do_unop
, type
,
947 gimple_assign_rhs1 (assign
),
964 tree rhs1
= gimple_assign_rhs1 (assign
);
965 tree rhs2
= gimple_assign_rhs2 (assign
);
967 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
973 tree rhs1
= gimple_assign_rhs1 (assign
);
974 tree rhs2
= gimple_assign_rhs2 (assign
);
978 || !VECTOR_INTEGER_TYPE_P (type
)
979 || TREE_CODE (rhs2
) != VECTOR_CST
980 || !VECTOR_MODE_P (TYPE_MODE (type
)))
983 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
984 if (ret
!= NULL_TREE
)
993 if (TREE_CODE_CLASS (code
) == tcc_unary
)
994 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
995 gimple_assign_rhs1 (assign
),
998 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
999 gimple_assign_rhs1 (assign
),
1000 gimple_assign_rhs2 (assign
), code
);
1004 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1006 _9 = { b_7, b_7, b_7, b_7 };
1007 a_5 = _9 + { 0, 3, 6, 9 };
1008 because vector splat operation is usually more efficient
1009 than piecewise initialization of the vector. */
1012 optimize_vector_constructor (gimple_stmt_iterator
*gsi
)
1014 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1015 tree lhs
= gimple_assign_lhs (stmt
);
1016 tree rhs
= gimple_assign_rhs1 (stmt
);
1017 tree type
= TREE_TYPE (rhs
);
1018 unsigned int i
, j
, nelts
= TYPE_VECTOR_SUBPARTS (type
);
1019 bool all_same
= true;
1020 constructor_elt
*elt
;
1023 tree base
= NULL_TREE
;
1026 if (nelts
<= 2 || CONSTRUCTOR_NELTS (rhs
) != nelts
)
1028 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
1029 if (op
== unknown_optab
1030 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
1032 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
1033 if (TREE_CODE (elt
->value
) != SSA_NAME
1034 || TREE_CODE (TREE_TYPE (elt
->value
)) == VECTOR_TYPE
)
1038 tree this_base
= elt
->value
;
1039 if (this_base
!= CONSTRUCTOR_ELT (rhs
, 0)->value
)
1041 for (j
= 0; j
< nelts
+ 1; j
++)
1043 g
= SSA_NAME_DEF_STMT (this_base
);
1044 if (is_gimple_assign (g
)
1045 && gimple_assign_rhs_code (g
) == PLUS_EXPR
1046 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
1047 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
1048 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
1049 this_base
= gimple_assign_rhs1 (g
);
1055 else if (this_base
!= base
)
1060 cst
= XALLOCAVEC (tree
, nelts
);
1061 for (i
= 0; i
< nelts
; i
++)
1063 tree this_base
= CONSTRUCTOR_ELT (rhs
, i
)->value
;;
1064 cst
[i
] = build_zero_cst (TREE_TYPE (base
));
1065 while (this_base
!= base
)
1067 g
= SSA_NAME_DEF_STMT (this_base
);
1068 cst
[i
] = fold_binary (PLUS_EXPR
, TREE_TYPE (base
),
1069 cst
[i
], gimple_assign_rhs2 (g
));
1070 if (cst
[i
] == NULL_TREE
1071 || TREE_CODE (cst
[i
]) != INTEGER_CST
1072 || TREE_OVERFLOW (cst
[i
]))
1074 this_base
= gimple_assign_rhs1 (g
);
1077 for (i
= 0; i
< nelts
; i
++)
1078 CONSTRUCTOR_ELT (rhs
, i
)->value
= base
;
1079 g
= gimple_build_assign (make_ssa_name (type
), rhs
);
1080 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1081 g
= gimple_build_assign (lhs
, PLUS_EXPR
, gimple_assign_lhs (g
),
1082 build_vector (type
, cst
));
1083 gsi_replace (gsi
, g
, false);
1086 /* Return a type for the widest vector mode whose components are of type
1087 TYPE, or NULL_TREE if none is found. */
1090 type_for_widest_vector_mode (tree type
, optab op
)
1092 machine_mode inner_mode
= TYPE_MODE (type
);
1093 machine_mode best_mode
= VOIDmode
, mode
;
1094 int best_nunits
= 0;
1096 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1097 mode
= MIN_MODE_VECTOR_FLOAT
;
1098 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1099 mode
= MIN_MODE_VECTOR_FRACT
;
1100 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1101 mode
= MIN_MODE_VECTOR_UFRACT
;
1102 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1103 mode
= MIN_MODE_VECTOR_ACCUM
;
1104 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1105 mode
= MIN_MODE_VECTOR_UACCUM
;
1107 mode
= MIN_MODE_VECTOR_INT
;
1109 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
1110 if (GET_MODE_INNER (mode
) == inner_mode
1111 && GET_MODE_NUNITS (mode
) > best_nunits
1112 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1113 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1115 if (best_mode
== VOIDmode
)
1118 return build_vector_type_for_mode (type
, best_mode
);
1122 /* Build a reference to the element of the vector VECT. Function
1123 returns either the element itself, either BIT_FIELD_REF, or an
1124 ARRAY_REF expression.
1126 GSI is required to insert temporary variables while building a
1127 refernece to the element of the vector VECT.
1129 PTMPVEC is a pointer to the temporary variable for caching
1130 purposes. In case when PTMPVEC is NULL new temporary variable
1133 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1135 tree vect_type
, vect_elt_type
;
1139 bool need_asgn
= true;
1140 unsigned int elements
;
1142 vect_type
= TREE_TYPE (vect
);
1143 vect_elt_type
= TREE_TYPE (vect_type
);
1144 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1146 if (TREE_CODE (idx
) == INTEGER_CST
)
1148 unsigned HOST_WIDE_INT index
;
1150 /* Given that we're about to compute a binary modulus,
1151 we don't care about the high bits of the value. */
1152 index
= TREE_INT_CST_LOW (idx
);
1153 if (!tree_fits_uhwi_p (idx
) || index
>= elements
)
1155 index
&= elements
- 1;
1156 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1159 /* When lowering a vector statement sequence do some easy
1160 simplification by looking through intermediate vector results. */
1161 if (TREE_CODE (vect
) == SSA_NAME
)
1163 gimple def_stmt
= SSA_NAME_DEF_STMT (vect
);
1164 if (is_gimple_assign (def_stmt
)
1165 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1166 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1167 vect
= gimple_assign_rhs1 (def_stmt
);
1170 if (TREE_CODE (vect
) == VECTOR_CST
)
1171 return VECTOR_CST_ELT (vect
, index
);
1172 else if (TREE_CODE (vect
) == CONSTRUCTOR
1173 && (CONSTRUCTOR_NELTS (vect
) == 0
1174 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1177 if (index
< CONSTRUCTOR_NELTS (vect
))
1178 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1179 return build_zero_cst (vect_elt_type
);
1183 tree size
= TYPE_SIZE (vect_elt_type
);
1184 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1186 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1191 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1193 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1202 TREE_ADDRESSABLE (tmpvec
) = 1;
1203 asgn
= gimple_build_assign (tmpvec
, vect
);
1204 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1207 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1208 return build4 (ARRAY_REF
, vect_elt_type
,
1209 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1210 idx
, NULL_TREE
, NULL_TREE
);
1213 /* Check if VEC_PERM_EXPR within the given setting is supported
1214 by hardware, or lower it piecewise.
1216 When VEC_PERM_EXPR has the same first and second operands:
1217 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1218 {v0[mask[0]], v0[mask[1]], ...}
1219 MASK and V0 must have the same number of elements.
1221 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1222 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1223 V0 and V1 must have the same type. MASK, V0, V1 must have the
1224 same number of arguments. */
1227 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1229 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1230 tree mask
= gimple_assign_rhs3 (stmt
);
1231 tree vec0
= gimple_assign_rhs1 (stmt
);
1232 tree vec1
= gimple_assign_rhs2 (stmt
);
1233 tree vect_type
= TREE_TYPE (vec0
);
1234 tree mask_type
= TREE_TYPE (mask
);
1235 tree vect_elt_type
= TREE_TYPE (vect_type
);
1236 tree mask_elt_type
= TREE_TYPE (mask_type
);
1237 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1238 vec
<constructor_elt
, va_gc
> *v
;
1239 tree constr
, t
, si
, i_val
;
1240 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1241 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1242 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1245 if (TREE_CODE (mask
) == SSA_NAME
)
1247 gimple def_stmt
= SSA_NAME_DEF_STMT (mask
);
1248 if (is_gimple_assign (def_stmt
)
1249 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1250 mask
= gimple_assign_rhs1 (def_stmt
);
1253 if (TREE_CODE (mask
) == VECTOR_CST
)
1255 unsigned char *sel_int
= XALLOCAVEC (unsigned char, elements
);
1257 for (i
= 0; i
< elements
; ++i
)
1258 sel_int
[i
] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask
, i
))
1259 & (2 * elements
- 1));
1261 if (can_vec_perm_p (TYPE_MODE (vect_type
), false, sel_int
))
1263 gimple_assign_set_rhs3 (stmt
, mask
);
1268 else if (can_vec_perm_p (TYPE_MODE (vect_type
), true, NULL
))
1271 warning_at (loc
, OPT_Wvector_operation_performance
,
1272 "vector shuffling operation will be expanded piecewise");
1274 vec_alloc (v
, elements
);
1275 for (i
= 0; i
< elements
; i
++)
1278 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1280 if (TREE_CODE (i_val
) == INTEGER_CST
)
1282 unsigned HOST_WIDE_INT index
;
1284 index
= TREE_INT_CST_LOW (i_val
);
1285 if (!tree_fits_uhwi_p (i_val
) || index
>= elements
)
1286 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1288 if (two_operand_p
&& (index
& elements
) != 0)
1289 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1291 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1293 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1294 true, GSI_SAME_STMT
);
1298 tree cond
= NULL_TREE
, v0_val
;
1302 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1303 build_int_cst (mask_elt_type
, elements
));
1304 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1305 true, GSI_SAME_STMT
);
1308 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1309 build_int_cst (mask_elt_type
, elements
- 1));
1310 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1311 true, GSI_SAME_STMT
);
1313 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1314 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1315 true, GSI_SAME_STMT
);
1321 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1322 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1323 true, GSI_SAME_STMT
);
1325 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1326 cond
, build_zero_cst (mask_elt_type
));
1327 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1328 cond
, v0_val
, v1_val
);
1329 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1330 true, GSI_SAME_STMT
);
1336 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1339 constr
= build_constructor (vect_type
, v
);
1340 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1341 update_stmt (gsi_stmt (*gsi
));
1344 /* Return type in which CODE operation with optab OP can be
1348 get_compute_type (enum tree_code code
, optab op
, tree type
)
1350 /* For very wide vectors, try using a smaller vector mode. */
1351 tree compute_type
= type
;
1353 && (!VECTOR_MODE_P (TYPE_MODE (type
))
1354 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
))
1356 tree vector_compute_type
1357 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1358 if (vector_compute_type
!= NULL_TREE
1359 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
1360 < TYPE_VECTOR_SUBPARTS (compute_type
))
1361 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1362 != CODE_FOR_nothing
))
1363 compute_type
= vector_compute_type
;
1366 /* If we are breaking a BLKmode vector into smaller pieces,
1367 type_for_widest_vector_mode has already looked into the optab,
1368 so skip these checks. */
1369 if (compute_type
== type
)
1371 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1372 if (VECTOR_MODE_P (compute_mode
))
1374 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1375 return compute_type
;
1376 if (code
== MULT_HIGHPART_EXPR
1377 && can_mult_highpart_p (compute_mode
,
1378 TYPE_UNSIGNED (compute_type
)))
1379 return compute_type
;
1381 /* There is no operation in hardware, so fall back to scalars. */
1382 compute_type
= TREE_TYPE (type
);
1385 return compute_type
;
1388 /* Helper function of expand_vector_operations_1. Return number of
1389 vector elements for vector types or 1 for other types. */
1392 count_type_subparts (tree type
)
1394 return VECTOR_TYPE_P (type
) ? TYPE_VECTOR_SUBPARTS (type
) : 1;
1398 do_cond (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
1399 tree bitpos
, tree bitsize
, enum tree_code code
)
1401 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
1402 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
1403 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
1404 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
1405 tree cond
= gimple_assign_rhs1 (gsi_stmt (*gsi
));
1406 return gimplify_build3 (gsi
, code
, inner_type
, cond
, a
, b
);
1409 /* Expand a vector COND_EXPR to scalars, piecewise. */
1411 expand_vector_scalar_condition (gimple_stmt_iterator
*gsi
)
1413 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1414 tree type
= gimple_expr_type (stmt
);
1415 tree compute_type
= get_compute_type (COND_EXPR
, mov_optab
, type
);
1416 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1417 gcc_assert (compute_mode
!= BLKmode
);
1418 tree lhs
= gimple_assign_lhs (stmt
);
1419 tree rhs2
= gimple_assign_rhs2 (stmt
);
1420 tree rhs3
= gimple_assign_rhs3 (stmt
);
1423 /* If the compute mode is not a vector mode (hence we are not decomposing
1424 a BLKmode vector to smaller, hardware-supported vectors), we may want
1425 to expand the operations in parallel. */
1426 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
1427 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
1428 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
1429 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
1430 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
1431 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
1432 new_rhs
= expand_vector_parallel (gsi
, do_cond
, type
, rhs2
, rhs3
,
1435 new_rhs
= expand_vector_piecewise (gsi
, do_cond
, type
, compute_type
,
1436 rhs2
, rhs3
, COND_EXPR
);
1437 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1438 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1441 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1442 way to do it is change expand_vector_operation and its callees to
1443 return a tree_code, RHS1 and RHS2 instead of a tree. */
1444 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1445 update_stmt (gsi_stmt (*gsi
));
1448 /* Process one statement. If we identify a vector operation, expand it. */
1451 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
1453 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
= NULL_TREE
;
1454 enum tree_code code
;
1455 optab op
= unknown_optab
;
1456 enum gimple_rhs_class rhs_class
;
1459 /* Only consider code == GIMPLE_ASSIGN. */
1460 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (*gsi
));
1464 code
= gimple_assign_rhs_code (stmt
);
1465 rhs_class
= get_gimple_rhs_class (code
);
1466 lhs
= gimple_assign_lhs (stmt
);
1468 if (code
== VEC_PERM_EXPR
)
1470 lower_vec_perm (gsi
);
1474 if (code
== VEC_COND_EXPR
)
1476 expand_vector_condition (gsi
);
1480 if (code
== COND_EXPR
1481 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == VECTOR_TYPE
1482 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == BLKmode
)
1484 expand_vector_scalar_condition (gsi
);
1488 if (code
== CONSTRUCTOR
1489 && TREE_CODE (lhs
) == SSA_NAME
1490 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs
)))
1491 && !gimple_clobber_p (stmt
)
1494 optimize_vector_constructor (gsi
);
1498 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
1501 rhs1
= gimple_assign_rhs1 (stmt
);
1502 type
= gimple_expr_type (stmt
);
1503 if (rhs_class
== GIMPLE_BINARY_RHS
)
1504 rhs2
= gimple_assign_rhs2 (stmt
);
1506 if (TREE_CODE (type
) != VECTOR_TYPE
)
1509 if (CONVERT_EXPR_CODE_P (code
)
1510 || code
== FLOAT_EXPR
1511 || code
== FIX_TRUNC_EXPR
1512 || code
== VIEW_CONVERT_EXPR
)
1515 /* The signedness is determined from input argument. */
1516 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
1517 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
1518 type
= TREE_TYPE (rhs1
);
1520 /* For widening/narrowing vector operations, the relevant type is of the
1521 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1522 calculated in the same way above. */
1523 if (code
== WIDEN_SUM_EXPR
1524 || code
== VEC_WIDEN_MULT_HI_EXPR
1525 || code
== VEC_WIDEN_MULT_LO_EXPR
1526 || code
== VEC_WIDEN_MULT_EVEN_EXPR
1527 || code
== VEC_WIDEN_MULT_ODD_EXPR
1528 || code
== VEC_UNPACK_HI_EXPR
1529 || code
== VEC_UNPACK_LO_EXPR
1530 || code
== VEC_PACK_TRUNC_EXPR
1531 || code
== VEC_PACK_SAT_EXPR
1532 || code
== VEC_PACK_FIX_TRUNC_EXPR
1533 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
1534 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
1535 type
= TREE_TYPE (rhs1
);
1537 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1539 if (code
== LSHIFT_EXPR
1540 || code
== RSHIFT_EXPR
1541 || code
== LROTATE_EXPR
1542 || code
== RROTATE_EXPR
)
1546 /* Check whether we have vector <op> {x,x,x,x} where x
1547 could be a scalar variable or a constant. Transform
1548 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1549 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1554 if ((TREE_CODE (rhs2
) == VECTOR_CST
1555 && (first
= uniform_vector_p (rhs2
)) != NULL_TREE
)
1556 || (TREE_CODE (rhs2
) == SSA_NAME
1557 && (def_stmt
= SSA_NAME_DEF_STMT (rhs2
))
1558 && gimple_assign_single_p (def_stmt
)
1559 && (first
= uniform_vector_p
1560 (gimple_assign_rhs1 (def_stmt
))) != NULL_TREE
))
1562 gimple_assign_set_rhs2 (stmt
, first
);
1568 opv
= optab_for_tree_code (code
, type
, optab_vector
);
1569 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1573 op
= optab_for_tree_code (code
, type
, optab_scalar
);
1575 compute_type
= get_compute_type (code
, op
, type
);
1576 if (compute_type
== type
)
1578 /* The rtl expander will expand vector/scalar as vector/vector
1579 if necessary. Pick one with wider vector type. */
1580 tree compute_vtype
= get_compute_type (code
, opv
, type
);
1581 if (count_type_subparts (compute_vtype
)
1582 > count_type_subparts (compute_type
))
1584 compute_type
= compute_vtype
;
1589 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1591 if (compute_type
== NULL_TREE
)
1592 compute_type
= get_compute_type (code
, op
, type
);
1593 if (compute_type
== type
)
1595 /* Before splitting vector rotates into scalar rotates,
1596 see if we can't use vector shifts and BIT_IOR_EXPR
1597 instead. For vector by vector rotates we'd also
1598 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1599 for now, fold doesn't seem to create such rotates anyway. */
1600 if (compute_type
== TREE_TYPE (type
)
1601 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1603 optab oplv
= vashl_optab
, opl
= ashl_optab
;
1604 optab oprv
= vlshr_optab
, opr
= lshr_optab
, opo
= ior_optab
;
1605 tree compute_lvtype
= get_compute_type (LSHIFT_EXPR
, oplv
, type
);
1606 tree compute_rvtype
= get_compute_type (RSHIFT_EXPR
, oprv
, type
);
1607 tree compute_otype
= get_compute_type (BIT_IOR_EXPR
, opo
, type
);
1608 tree compute_ltype
= get_compute_type (LSHIFT_EXPR
, opl
, type
);
1609 tree compute_rtype
= get_compute_type (RSHIFT_EXPR
, opr
, type
);
1610 /* The rtl expander will expand vector/scalar as vector/vector
1611 if necessary. Pick one with wider vector type. */
1612 if (count_type_subparts (compute_lvtype
)
1613 > count_type_subparts (compute_ltype
))
1615 compute_ltype
= compute_lvtype
;
1618 if (count_type_subparts (compute_rvtype
)
1619 > count_type_subparts (compute_rtype
))
1621 compute_rtype
= compute_rvtype
;
1624 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1626 compute_type
= compute_ltype
;
1627 if (count_type_subparts (compute_type
)
1628 > count_type_subparts (compute_rtype
))
1629 compute_type
= compute_rtype
;
1630 if (count_type_subparts (compute_type
)
1631 > count_type_subparts (compute_otype
))
1632 compute_type
= compute_otype
;
1633 /* Verify all 3 operations can be performed in that type. */
1634 if (compute_type
!= TREE_TYPE (type
))
1636 if (optab_handler (opl
, TYPE_MODE (compute_type
))
1638 || optab_handler (opr
, TYPE_MODE (compute_type
))
1640 || optab_handler (opo
, TYPE_MODE (compute_type
))
1641 == CODE_FOR_nothing
)
1642 compute_type
= TREE_TYPE (type
);
1648 op
= optab_for_tree_code (code
, type
, optab_default
);
1650 /* Optabs will try converting a negation into a subtraction, so
1651 look for it as well. TODO: negation of floating-point vectors
1652 might be turned into an exclusive OR toggling the sign bit. */
1653 if (op
== unknown_optab
1654 && code
== NEGATE_EXPR
1655 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
1656 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
1658 if (compute_type
== NULL_TREE
)
1659 compute_type
= get_compute_type (code
, op
, type
);
1660 if (compute_type
== type
)
1663 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
1665 /* Leave expression untouched for later expansion. */
1666 if (new_rhs
== NULL_TREE
)
1669 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1670 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1673 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1674 way to do it is change expand_vector_operation and its callees to
1675 return a tree_code, RHS1 and RHS2 instead of a tree. */
1676 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1677 update_stmt (gsi_stmt (*gsi
));
1680 /* Use this to lower vector operations introduced by the vectorizer,
1681 if it may need the bit-twiddling tricks implemented in this file. */
1684 expand_vector_operations (void)
1686 gimple_stmt_iterator gsi
;
1688 bool cfg_changed
= false;
1690 FOR_EACH_BB_FN (bb
, cfun
)
1692 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1694 expand_vector_operations_1 (&gsi
);
1695 /* ??? If we do not cleanup EH then we will ICE in
1696 verification. But in reality we have created wrong-code
1697 as we did not properly transition EH info and edges to
1698 the piecewise computations. */
1699 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
1700 && gimple_purge_dead_eh_edges (bb
))
1705 return cfg_changed
? TODO_cleanup_cfg
: 0;
1710 const pass_data pass_data_lower_vector
=
1712 GIMPLE_PASS
, /* type */
1713 "veclower", /* name */
1714 OPTGROUP_VEC
, /* optinfo_flags */
1715 TV_NONE
, /* tv_id */
1716 PROP_cfg
, /* properties_required */
1717 PROP_gimple_lvec
, /* properties_provided */
1718 0, /* properties_destroyed */
1719 0, /* todo_flags_start */
1720 TODO_update_ssa
, /* todo_flags_finish */
1723 class pass_lower_vector
: public gimple_opt_pass
1726 pass_lower_vector (gcc::context
*ctxt
)
1727 : gimple_opt_pass (pass_data_lower_vector
, ctxt
)
1730 /* opt_pass methods: */
1731 virtual bool gate (function
*fun
)
1733 return !(fun
->curr_properties
& PROP_gimple_lvec
);
1736 virtual unsigned int execute (function
*)
1738 return expand_vector_operations ();
1741 }; // class pass_lower_vector
1746 make_pass_lower_vector (gcc::context
*ctxt
)
1748 return new pass_lower_vector (ctxt
);
1753 const pass_data pass_data_lower_vector_ssa
=
1755 GIMPLE_PASS
, /* type */
1756 "veclower2", /* name */
1757 OPTGROUP_VEC
, /* optinfo_flags */
1758 TV_NONE
, /* tv_id */
1759 PROP_cfg
, /* properties_required */
1760 PROP_gimple_lvec
, /* properties_provided */
1761 0, /* properties_destroyed */
1762 0, /* todo_flags_start */
1764 | TODO_cleanup_cfg
), /* todo_flags_finish */
1767 class pass_lower_vector_ssa
: public gimple_opt_pass
1770 pass_lower_vector_ssa (gcc::context
*ctxt
)
1771 : gimple_opt_pass (pass_data_lower_vector_ssa
, ctxt
)
1774 /* opt_pass methods: */
1775 opt_pass
* clone () { return new pass_lower_vector_ssa (m_ctxt
); }
1776 virtual unsigned int execute (function
*)
1778 return expand_vector_operations ();
1781 }; // class pass_lower_vector_ssa
1786 make_pass_lower_vector_ssa (gcc::context
*ctxt
)
1788 return new pass_lower_vector_ssa (ctxt
);
1791 #include "gt-tree-vect-generic.h"