1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "langhooks.h"
33 #include "internal-fn.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
38 #include "tree-iterator.h"
39 #include "tree-pass.h"
41 #include "diagnostic.h"
44 #include "insn-codes.h"
45 #include "optabs-tree.h"
48 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
51 /* Build a constant of type TYPE, made of VALUE's bits replicated
52 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
54 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
56 int width
= tree_to_uhwi (TYPE_SIZE (inner_type
));
57 int n
= (TYPE_PRECISION (type
) + HOST_BITS_PER_WIDE_INT
- 1)
58 / HOST_BITS_PER_WIDE_INT
;
59 unsigned HOST_WIDE_INT low
, mask
;
60 HOST_WIDE_INT a
[WIDE_INT_MAX_ELTS
];
63 gcc_assert (n
&& n
<= WIDE_INT_MAX_ELTS
);
65 if (width
== HOST_BITS_PER_WIDE_INT
)
69 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
70 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
73 for (i
= 0; i
< n
; i
++)
76 gcc_assert (TYPE_PRECISION (type
) <= MAX_BITSIZE_MODE_ANY_INT
);
77 return wide_int_to_tree
78 (type
, wide_int::from_array (a
, n
, TYPE_PRECISION (type
)));
81 static GTY(()) tree vector_inner_type
;
82 static GTY(()) tree vector_last_type
;
83 static GTY(()) int vector_last_nunits
;
85 /* Return a suitable vector types made of SUBPARTS units each of mode
86 "word_mode" (the global variable). */
88 build_word_mode_vector_type (int nunits
)
90 if (!vector_inner_type
)
91 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
92 else if (vector_last_nunits
== nunits
)
94 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
95 return vector_last_type
;
98 /* We build a new type, but we canonicalize it nevertheless,
99 because it still saves some memory. */
100 vector_last_nunits
= nunits
;
101 vector_last_type
= type_hash_canon (nunits
,
102 build_vector_type (vector_inner_type
,
104 return vector_last_type
;
107 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
108 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
111 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
112 tree t
, tree bitsize
, tree bitpos
)
115 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
117 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
121 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
122 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
125 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
126 return gimplify_build1 (gsi
, code
, inner_type
, a
);
130 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
131 tree bitpos
, tree bitsize
, enum tree_code code
)
133 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
134 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
135 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
136 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
137 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
140 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
142 INNER_TYPE is the type of A and B elements
144 returned expression is of signed integer type with the
145 size equal to the size of INNER_TYPE. */
147 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
148 tree bitpos
, tree bitsize
, enum tree_code code
)
152 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
153 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
155 comp_type
= build_nonstandard_integer_type
156 (GET_MODE_BITSIZE (TYPE_MODE (inner_type
)), 0);
158 return gimplify_build3 (gsi
, COND_EXPR
, comp_type
,
159 fold_build2 (code
, boolean_type_node
, a
, b
),
160 build_int_cst (comp_type
, -1),
161 build_int_cst (comp_type
, 0));
164 /* Expand vector addition to scalars. This does bit twiddling
165 in order to increase parallelism:
167 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
170 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
171 (a ^ ~b) & 0x80808080
173 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
175 This optimization should be done only if 4 vector items or more
178 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
179 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
182 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
183 unsigned HOST_WIDE_INT max
;
184 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
186 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
187 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
188 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
190 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
191 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
193 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
194 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
195 if (code
== PLUS_EXPR
)
196 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
199 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
200 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
203 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
204 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
205 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
209 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
210 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
211 tree bitsize ATTRIBUTE_UNUSED
,
212 enum tree_code code ATTRIBUTE_UNUSED
)
214 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
216 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
218 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
219 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
220 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
222 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
224 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
225 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
226 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
227 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
228 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
231 /* Expand a vector operation to scalars, by using many operations
232 whose type is the vector type's inner type. */
234 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
235 tree type
, tree inner_type
,
236 tree a
, tree b
, enum tree_code code
)
238 vec
<constructor_elt
, va_gc
> *v
;
239 tree part_width
= TYPE_SIZE (inner_type
);
240 tree index
= bitsize_int (0);
241 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
242 int delta
= tree_to_uhwi (part_width
)
243 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type
)));
245 location_t loc
= gimple_location (gsi_stmt (*gsi
));
247 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
248 warning_at (loc
, OPT_Wvector_operation_performance
,
249 "vector operation will be expanded piecewise");
251 warning_at (loc
, OPT_Wvector_operation_performance
,
252 "vector operation will be expanded in parallel");
254 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
255 for (i
= 0; i
< nunits
;
256 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
258 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
);
259 constructor_elt ce
= {NULL_TREE
, result
};
263 return build_constructor (type
, v
);
266 /* Expand a vector operation to scalars with the freedom to use
267 a scalar integer type, or to use a different size for the items
268 in the vector type. */
270 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
274 tree result
, compute_type
;
276 int n_words
= tree_to_uhwi (TYPE_SIZE_UNIT (type
)) / UNITS_PER_WORD
;
277 location_t loc
= gimple_location (gsi_stmt (*gsi
));
279 /* We have three strategies. If the type is already correct, just do
280 the operation an element at a time. Else, if the vector is wider than
281 one word, do it a word at a time; finally, if the vector is smaller
282 than one word, do it as a scalar. */
283 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
284 return expand_vector_piecewise (gsi
, f
,
285 type
, TREE_TYPE (type
),
287 else if (n_words
> 1)
289 tree word_type
= build_word_mode_vector_type (n_words
);
290 result
= expand_vector_piecewise (gsi
, f
,
291 word_type
, TREE_TYPE (word_type
),
293 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
298 /* Use a single scalar operation with a mode no wider than word_mode. */
299 mode
= mode_for_size (tree_to_uhwi (TYPE_SIZE (type
)), MODE_INT
, 0);
300 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
301 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
302 warning_at (loc
, OPT_Wvector_operation_performance
,
303 "vector operation will be expanded with a "
304 "single scalar operation");
310 /* Expand a vector operation to scalars; for integer types we can use
311 special bit twiddling tricks to do the sums a word at a time, using
312 function F_PARALLEL instead of F. These tricks are done only if
313 they can process at least four items, that is, only if the vector
314 holds at least four items and if a word can hold four items. */
316 expand_vector_addition (gimple_stmt_iterator
*gsi
,
317 elem_op_func f
, elem_op_func f_parallel
,
318 tree type
, tree a
, tree b
, enum tree_code code
)
320 int parts_per_word
= UNITS_PER_WORD
321 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
323 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
324 && parts_per_word
>= 4
325 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
326 return expand_vector_parallel (gsi
, f_parallel
,
329 return expand_vector_piecewise (gsi
, f
,
330 type
, TREE_TYPE (type
),
334 /* Try to expand vector comparison expression OP0 CODE OP1 by
335 querying optab if the following expression:
336 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
339 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
340 tree op1
, enum tree_code code
)
343 if (! expand_vec_cond_expr_p (type
, TREE_TYPE (op0
)))
344 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
345 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
352 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
353 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
354 the result if successful, otherwise return NULL_TREE. */
356 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
359 unsigned int i
, nunits
= TYPE_VECTOR_SUBPARTS (type
);
360 bool scalar_shift
= true;
362 for (i
= 1; i
< nunits
; i
++)
364 if (shiftcnts
[i
] != shiftcnts
[0])
365 scalar_shift
= false;
368 if (scalar_shift
&& shiftcnts
[0] == 0)
373 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
374 if (op
!= unknown_optab
375 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
376 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
377 build_int_cst (NULL_TREE
, shiftcnts
[0]));
380 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
381 if (op
!= unknown_optab
382 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
384 tree
*vec
= XALLOCAVEC (tree
, nunits
);
385 for (i
= 0; i
< nunits
; i
++)
386 vec
[i
] = build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]);
387 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
388 build_vector (type
, vec
));
394 /* Try to expand integer vector division by constant using
395 widening multiply, shifts and additions. */
397 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
398 tree op1
, enum tree_code code
)
400 bool use_pow2
= true;
401 bool has_vector_shift
= true;
402 int mode
= -1, this_mode
;
403 int pre_shift
= -1, post_shift
;
404 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (type
);
405 int *shifts
= XALLOCAVEC (int, nunits
* 4);
406 int *pre_shifts
= shifts
+ nunits
;
407 int *post_shifts
= pre_shifts
+ nunits
;
408 int *shift_temps
= post_shifts
+ nunits
;
409 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
410 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
413 signop sign_p
= TYPE_SIGN (TREE_TYPE (type
));
414 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
416 tree cur_op
, mulcst
, tem
;
419 if (prec
> HOST_BITS_PER_WIDE_INT
)
422 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
423 if (op
== unknown_optab
424 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
425 has_vector_shift
= false;
427 /* Analysis phase. Determine if all op1 elements are either power
428 of two and it is possible to expand it using shifts (or for remainder
429 using masking). Additionally compute the multiplicative constants
430 and pre and post shifts if the division is to be expanded using
431 widening or high part multiplication plus shifts. */
432 for (i
= 0; i
< nunits
; i
++)
434 tree cst
= VECTOR_CST_ELT (op1
, i
);
435 unsigned HOST_WIDE_INT ml
;
437 if (TREE_CODE (cst
) != INTEGER_CST
|| integer_zerop (cst
))
443 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
447 shifts
[i
] = tree_log2 (cst
);
448 if (shifts
[i
] != shifts
[0]
449 && code
== TRUNC_DIV_EXPR
450 && !has_vector_shift
)
455 if (sign_p
== UNSIGNED
)
457 unsigned HOST_WIDE_INT mh
;
458 unsigned HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
) & mask
;
460 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
461 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
470 /* Find a suitable multiplier and right shift count
471 instead of multiplying with D. */
472 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
474 /* If the suggested multiplier is more than SIZE bits, we can
475 do better for even divisors, using an initial right shift. */
476 if ((mh
!= 0 && (d
& 1) == 0)
477 || (!has_vector_shift
&& pre_shift
!= -1))
479 if (has_vector_shift
)
480 pre_shift
= floor_log2 (d
& -d
);
481 else if (pre_shift
== -1)
484 for (j
= 0; j
< nunits
; j
++)
486 tree cst2
= VECTOR_CST_ELT (op1
, j
);
487 unsigned HOST_WIDE_INT d2
;
490 if (!tree_fits_uhwi_p (cst2
))
492 d2
= tree_to_uhwi (cst2
) & mask
;
495 this_pre_shift
= floor_log2 (d2
& -d2
);
496 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
497 pre_shift
= this_pre_shift
;
499 if (i
!= 0 && pre_shift
!= 0)
509 if ((d
>> pre_shift
) <= 1)
514 mh
= choose_multiplier (d
>> pre_shift
, prec
,
516 &ml
, &post_shift
, &dummy_int
);
518 pre_shifts
[i
] = pre_shift
;
528 HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
);
529 unsigned HOST_WIDE_INT abs_d
;
534 /* Since d might be INT_MIN, we have to cast to
535 unsigned HOST_WIDE_INT before negating to avoid
536 undefined signed overflow. */
538 ? (unsigned HOST_WIDE_INT
) d
539 : - (unsigned HOST_WIDE_INT
) d
);
541 /* n rem d = n rem -d */
542 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
544 else if (abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
546 /* This case is not handled correctly below. */
556 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
557 &post_shift
, &dummy_int
);
558 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
560 this_mode
= 4 + (d
< 0);
561 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
564 this_mode
= 2 + (d
< 0);
567 post_shifts
[i
] = post_shift
;
568 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
569 || post_shift
>= prec
570 || pre_shifts
[i
] >= prec
)
575 else if (mode
!= this_mode
)
579 vec
= XALLOCAVEC (tree
, nunits
);
583 tree addend
= NULL_TREE
;
584 if (sign_p
== SIGNED
)
588 /* Both division and remainder sequences need
589 op0 < 0 ? mask : 0 computed. It can be either computed as
590 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
591 if none of the shifts is 0, or as the conditional. */
592 for (i
= 0; i
< nunits
; i
++)
596 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
598 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
600 for (i
= 0; i
< nunits
; i
++)
601 shift_temps
[i
] = prec
- 1;
602 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
603 if (cur_op
!= NULL_TREE
)
605 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
607 for (i
= 0; i
< nunits
; i
++)
608 shift_temps
[i
] = prec
- shifts
[i
];
609 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
610 if (cur_op
!= NULL_TREE
)
611 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
615 if (addend
== NULL_TREE
616 && expand_vec_cond_expr_p (type
, type
))
618 tree zero
, cst
, cond
;
621 zero
= build_zero_cst (type
);
622 cond
= build2 (LT_EXPR
, type
, op0
, zero
);
623 for (i
= 0; i
< nunits
; i
++)
624 vec
[i
] = build_int_cst (TREE_TYPE (type
),
625 ((unsigned HOST_WIDE_INT
) 1
627 cst
= build_vector (type
, vec
);
628 addend
= make_ssa_name (type
);
629 stmt
= gimple_build_assign (addend
, VEC_COND_EXPR
, cond
,
631 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
634 if (code
== TRUNC_DIV_EXPR
)
636 if (sign_p
== UNSIGNED
)
638 /* q = op0 >> shift; */
639 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
640 if (cur_op
!= NULL_TREE
)
643 else if (addend
!= NULL_TREE
)
645 /* t1 = op0 + addend;
647 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
648 if (op
!= unknown_optab
649 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
651 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
652 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
653 if (cur_op
!= NULL_TREE
)
661 for (i
= 0; i
< nunits
; i
++)
662 vec
[i
] = build_int_cst (TREE_TYPE (type
),
663 ((unsigned HOST_WIDE_INT
) 1
665 mask
= build_vector (type
, vec
);
666 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
667 if (op
!= unknown_optab
668 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
670 if (sign_p
== UNSIGNED
)
671 /* r = op0 & mask; */
672 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
673 else if (addend
!= NULL_TREE
)
675 /* t1 = op0 + addend;
678 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
679 if (op
!= unknown_optab
680 && optab_handler (op
, TYPE_MODE (type
))
683 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
685 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
687 op
= optab_for_tree_code (MINUS_EXPR
, type
,
689 if (op
!= unknown_optab
690 && optab_handler (op
, TYPE_MODE (type
))
692 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
700 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
703 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
711 gcc_assert (sign_p
== UNSIGNED
);
712 /* t1 = oprnd0 >> pre_shift;
714 q = t2 >> post_shift; */
715 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
716 if (cur_op
== NULL_TREE
)
720 gcc_assert (sign_p
== UNSIGNED
);
721 for (i
= 0; i
< nunits
; i
++)
731 gcc_assert (sign_p
== SIGNED
);
732 for (i
= 0; i
< nunits
; i
++)
733 shift_temps
[i
] = prec
- 1;
739 for (i
= 0; i
< nunits
; i
++)
740 vec
[i
] = build_int_cst (TREE_TYPE (type
), mulc
[i
]);
741 mulcst
= build_vector (type
, vec
);
743 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
748 /* t1 = oprnd0 >> pre_shift;
750 q = t2 >> post_shift; */
751 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
754 /* t1 = oprnd0 h* ml;
758 q = t4 >> (post_shift - 1); */
759 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
760 if (op
== unknown_optab
761 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
763 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
764 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
765 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
766 if (op
== unknown_optab
767 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
769 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
770 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
771 if (cur_op
== NULL_TREE
)
778 /* t1 = oprnd0 h* ml;
779 t2 = t1; [ iff (mode & 2) != 0 ]
780 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
781 t3 = t2 >> post_shift;
782 t4 = oprnd0 >> (prec - 1);
783 q = t3 - t4; [ iff (mode & 1) == 0 ]
784 q = t4 - t3; [ iff (mode & 1) != 0 ] */
787 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
788 if (op
== unknown_optab
789 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
791 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
793 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
794 if (cur_op
== NULL_TREE
)
796 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
797 if (tem
== NULL_TREE
)
799 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
800 if (op
== unknown_optab
801 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
804 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
806 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
812 if (code
== TRUNC_DIV_EXPR
)
815 /* We divided. Now finish by:
818 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
819 if (op
== unknown_optab
820 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
822 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
823 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
824 if (op
== unknown_optab
825 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
827 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
830 /* Expand a vector condition to scalars, by using many conditions
831 on the vector's elements. */
833 expand_vector_condition (gimple_stmt_iterator
*gsi
)
835 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
836 tree type
= gimple_expr_type (stmt
);
837 tree a
= gimple_assign_rhs1 (stmt
);
840 bool a_is_comparison
= false;
841 tree b
= gimple_assign_rhs2 (stmt
);
842 tree c
= gimple_assign_rhs3 (stmt
);
843 vec
<constructor_elt
, va_gc
> *v
;
845 tree inner_type
= TREE_TYPE (type
);
846 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
847 tree comp_inner_type
= cond_type
;
848 tree width
= TYPE_SIZE (inner_type
);
849 tree index
= bitsize_int (0);
850 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
852 location_t loc
= gimple_location (gsi_stmt (*gsi
));
854 if (!is_gimple_val (a
))
856 gcc_assert (COMPARISON_CLASS_P (a
));
857 a_is_comparison
= true;
858 a1
= TREE_OPERAND (a
, 0);
859 a2
= TREE_OPERAND (a
, 1);
860 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
863 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
)))
866 /* TODO: try and find a smaller vector type. */
868 warning_at (loc
, OPT_Wvector_operation_performance
,
869 "vector condition will be expanded piecewise");
871 vec_alloc (v
, nunits
);
872 for (i
= 0; i
< nunits
;
873 i
++, index
= int_const_binop (PLUS_EXPR
, index
, width
))
876 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
877 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
880 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
, width
, index
);
881 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
, width
, index
);
882 aa
= build2 (TREE_CODE (a
), cond_type
, aa1
, aa2
);
885 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
886 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
887 constructor_elt ce
= {NULL_TREE
, result
};
891 constr
= build_constructor (type
, v
);
892 gimple_assign_set_rhs_from_tree (gsi
, constr
);
893 update_stmt (gsi_stmt (*gsi
));
897 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
898 gassign
*assign
, enum tree_code code
)
900 machine_mode compute_mode
= TYPE_MODE (compute_type
);
902 /* If the compute mode is not a vector mode (hence we are not decomposing
903 a BLKmode vector to smaller, hardware-supported vectors), we may want
904 to expand the operations in parallel. */
905 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
906 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
907 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
908 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
909 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
910 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
915 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
916 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
917 gimple_assign_rhs1 (assign
),
918 gimple_assign_rhs2 (assign
), code
);
922 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
923 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
924 gimple_assign_rhs1 (assign
),
931 return expand_vector_parallel (gsi
, do_binop
, type
,
932 gimple_assign_rhs1 (assign
),
933 gimple_assign_rhs2 (assign
), code
);
936 return expand_vector_parallel (gsi
, do_unop
, type
,
937 gimple_assign_rhs1 (assign
),
954 tree rhs1
= gimple_assign_rhs1 (assign
);
955 tree rhs2
= gimple_assign_rhs2 (assign
);
957 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
963 tree rhs1
= gimple_assign_rhs1 (assign
);
964 tree rhs2
= gimple_assign_rhs2 (assign
);
968 || !VECTOR_INTEGER_TYPE_P (type
)
969 || TREE_CODE (rhs2
) != VECTOR_CST
970 || !VECTOR_MODE_P (TYPE_MODE (type
)))
973 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
974 if (ret
!= NULL_TREE
)
983 if (TREE_CODE_CLASS (code
) == tcc_unary
)
984 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
985 gimple_assign_rhs1 (assign
),
988 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
989 gimple_assign_rhs1 (assign
),
990 gimple_assign_rhs2 (assign
), code
);
994 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
996 _9 = { b_7, b_7, b_7, b_7 };
997 a_5 = _9 + { 0, 3, 6, 9 };
998 because vector splat operation is usually more efficient
999 than piecewise initialization of the vector. */
1002 optimize_vector_constructor (gimple_stmt_iterator
*gsi
)
1004 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1005 tree lhs
= gimple_assign_lhs (stmt
);
1006 tree rhs
= gimple_assign_rhs1 (stmt
);
1007 tree type
= TREE_TYPE (rhs
);
1008 unsigned int i
, j
, nelts
= TYPE_VECTOR_SUBPARTS (type
);
1009 bool all_same
= true;
1010 constructor_elt
*elt
;
1013 tree base
= NULL_TREE
;
1016 if (nelts
<= 2 || CONSTRUCTOR_NELTS (rhs
) != nelts
)
1018 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
1019 if (op
== unknown_optab
1020 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
1022 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
1023 if (TREE_CODE (elt
->value
) != SSA_NAME
1024 || TREE_CODE (TREE_TYPE (elt
->value
)) == VECTOR_TYPE
)
1028 tree this_base
= elt
->value
;
1029 if (this_base
!= CONSTRUCTOR_ELT (rhs
, 0)->value
)
1031 for (j
= 0; j
< nelts
+ 1; j
++)
1033 g
= SSA_NAME_DEF_STMT (this_base
);
1034 if (is_gimple_assign (g
)
1035 && gimple_assign_rhs_code (g
) == PLUS_EXPR
1036 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
1037 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
1038 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
1039 this_base
= gimple_assign_rhs1 (g
);
1045 else if (this_base
!= base
)
1050 cst
= XALLOCAVEC (tree
, nelts
);
1051 for (i
= 0; i
< nelts
; i
++)
1053 tree this_base
= CONSTRUCTOR_ELT (rhs
, i
)->value
;;
1054 cst
[i
] = build_zero_cst (TREE_TYPE (base
));
1055 while (this_base
!= base
)
1057 g
= SSA_NAME_DEF_STMT (this_base
);
1058 cst
[i
] = fold_binary (PLUS_EXPR
, TREE_TYPE (base
),
1059 cst
[i
], gimple_assign_rhs2 (g
));
1060 if (cst
[i
] == NULL_TREE
1061 || TREE_CODE (cst
[i
]) != INTEGER_CST
1062 || TREE_OVERFLOW (cst
[i
]))
1064 this_base
= gimple_assign_rhs1 (g
);
1067 for (i
= 0; i
< nelts
; i
++)
1068 CONSTRUCTOR_ELT (rhs
, i
)->value
= base
;
1069 g
= gimple_build_assign (make_ssa_name (type
), rhs
);
1070 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1071 g
= gimple_build_assign (lhs
, PLUS_EXPR
, gimple_assign_lhs (g
),
1072 build_vector (type
, cst
));
1073 gsi_replace (gsi
, g
, false);
1076 /* Return a type for the widest vector mode whose components are of type
1077 TYPE, or NULL_TREE if none is found. */
1080 type_for_widest_vector_mode (tree type
, optab op
)
1082 machine_mode inner_mode
= TYPE_MODE (type
);
1083 machine_mode best_mode
= VOIDmode
, mode
;
1084 int best_nunits
= 0;
1086 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1087 mode
= MIN_MODE_VECTOR_FLOAT
;
1088 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1089 mode
= MIN_MODE_VECTOR_FRACT
;
1090 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1091 mode
= MIN_MODE_VECTOR_UFRACT
;
1092 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1093 mode
= MIN_MODE_VECTOR_ACCUM
;
1094 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1095 mode
= MIN_MODE_VECTOR_UACCUM
;
1097 mode
= MIN_MODE_VECTOR_INT
;
1099 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
1100 if (GET_MODE_INNER (mode
) == inner_mode
1101 && GET_MODE_NUNITS (mode
) > best_nunits
1102 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1103 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1105 if (best_mode
== VOIDmode
)
1108 return build_vector_type_for_mode (type
, best_mode
);
1112 /* Build a reference to the element of the vector VECT. Function
1113 returns either the element itself, either BIT_FIELD_REF, or an
1114 ARRAY_REF expression.
1116 GSI is required to insert temporary variables while building a
1117 refernece to the element of the vector VECT.
1119 PTMPVEC is a pointer to the temporary variable for caching
1120 purposes. In case when PTMPVEC is NULL new temporary variable
1123 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1125 tree vect_type
, vect_elt_type
;
1129 bool need_asgn
= true;
1130 unsigned int elements
;
1132 vect_type
= TREE_TYPE (vect
);
1133 vect_elt_type
= TREE_TYPE (vect_type
);
1134 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1136 if (TREE_CODE (idx
) == INTEGER_CST
)
1138 unsigned HOST_WIDE_INT index
;
1140 /* Given that we're about to compute a binary modulus,
1141 we don't care about the high bits of the value. */
1142 index
= TREE_INT_CST_LOW (idx
);
1143 if (!tree_fits_uhwi_p (idx
) || index
>= elements
)
1145 index
&= elements
- 1;
1146 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1149 /* When lowering a vector statement sequence do some easy
1150 simplification by looking through intermediate vector results. */
1151 if (TREE_CODE (vect
) == SSA_NAME
)
1153 gimple
*def_stmt
= SSA_NAME_DEF_STMT (vect
);
1154 if (is_gimple_assign (def_stmt
)
1155 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1156 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1157 vect
= gimple_assign_rhs1 (def_stmt
);
1160 if (TREE_CODE (vect
) == VECTOR_CST
)
1161 return VECTOR_CST_ELT (vect
, index
);
1162 else if (TREE_CODE (vect
) == CONSTRUCTOR
1163 && (CONSTRUCTOR_NELTS (vect
) == 0
1164 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1167 if (index
< CONSTRUCTOR_NELTS (vect
))
1168 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1169 return build_zero_cst (vect_elt_type
);
1173 tree size
= TYPE_SIZE (vect_elt_type
);
1174 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1176 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1181 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1183 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1192 TREE_ADDRESSABLE (tmpvec
) = 1;
1193 asgn
= gimple_build_assign (tmpvec
, vect
);
1194 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1197 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1198 return build4 (ARRAY_REF
, vect_elt_type
,
1199 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1200 idx
, NULL_TREE
, NULL_TREE
);
1203 /* Check if VEC_PERM_EXPR within the given setting is supported
1204 by hardware, or lower it piecewise.
1206 When VEC_PERM_EXPR has the same first and second operands:
1207 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1208 {v0[mask[0]], v0[mask[1]], ...}
1209 MASK and V0 must have the same number of elements.
1211 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1212 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1213 V0 and V1 must have the same type. MASK, V0, V1 must have the
1214 same number of arguments. */
1217 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1219 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1220 tree mask
= gimple_assign_rhs3 (stmt
);
1221 tree vec0
= gimple_assign_rhs1 (stmt
);
1222 tree vec1
= gimple_assign_rhs2 (stmt
);
1223 tree vect_type
= TREE_TYPE (vec0
);
1224 tree mask_type
= TREE_TYPE (mask
);
1225 tree vect_elt_type
= TREE_TYPE (vect_type
);
1226 tree mask_elt_type
= TREE_TYPE (mask_type
);
1227 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1228 vec
<constructor_elt
, va_gc
> *v
;
1229 tree constr
, t
, si
, i_val
;
1230 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1231 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1232 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1235 if (TREE_CODE (mask
) == SSA_NAME
)
1237 gimple
*def_stmt
= SSA_NAME_DEF_STMT (mask
);
1238 if (is_gimple_assign (def_stmt
)
1239 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1240 mask
= gimple_assign_rhs1 (def_stmt
);
1243 if (TREE_CODE (mask
) == VECTOR_CST
)
1245 unsigned char *sel_int
= XALLOCAVEC (unsigned char, elements
);
1247 for (i
= 0; i
< elements
; ++i
)
1248 sel_int
[i
] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask
, i
))
1249 & (2 * elements
- 1));
1251 if (can_vec_perm_p (TYPE_MODE (vect_type
), false, sel_int
))
1253 gimple_assign_set_rhs3 (stmt
, mask
);
1258 else if (can_vec_perm_p (TYPE_MODE (vect_type
), true, NULL
))
1261 warning_at (loc
, OPT_Wvector_operation_performance
,
1262 "vector shuffling operation will be expanded piecewise");
1264 vec_alloc (v
, elements
);
1265 for (i
= 0; i
< elements
; i
++)
1268 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1270 if (TREE_CODE (i_val
) == INTEGER_CST
)
1272 unsigned HOST_WIDE_INT index
;
1274 index
= TREE_INT_CST_LOW (i_val
);
1275 if (!tree_fits_uhwi_p (i_val
) || index
>= elements
)
1276 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1278 if (two_operand_p
&& (index
& elements
) != 0)
1279 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1281 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1283 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1284 true, GSI_SAME_STMT
);
1288 tree cond
= NULL_TREE
, v0_val
;
1292 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1293 build_int_cst (mask_elt_type
, elements
));
1294 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1295 true, GSI_SAME_STMT
);
1298 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1299 build_int_cst (mask_elt_type
, elements
- 1));
1300 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1301 true, GSI_SAME_STMT
);
1303 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1304 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1305 true, GSI_SAME_STMT
);
1311 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1312 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1313 true, GSI_SAME_STMT
);
1315 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1316 cond
, build_zero_cst (mask_elt_type
));
1317 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1318 cond
, v0_val
, v1_val
);
1319 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1320 true, GSI_SAME_STMT
);
1326 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1329 constr
= build_constructor (vect_type
, v
);
1330 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1331 update_stmt (gsi_stmt (*gsi
));
1334 /* Return type in which CODE operation with optab OP can be
1338 get_compute_type (enum tree_code code
, optab op
, tree type
)
1340 /* For very wide vectors, try using a smaller vector mode. */
1341 tree compute_type
= type
;
1343 && (!VECTOR_MODE_P (TYPE_MODE (type
))
1344 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
))
1346 tree vector_compute_type
1347 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1348 if (vector_compute_type
!= NULL_TREE
1349 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
1350 < TYPE_VECTOR_SUBPARTS (compute_type
))
1351 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1352 != CODE_FOR_nothing
))
1353 compute_type
= vector_compute_type
;
1356 /* If we are breaking a BLKmode vector into smaller pieces,
1357 type_for_widest_vector_mode has already looked into the optab,
1358 so skip these checks. */
1359 if (compute_type
== type
)
1361 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1362 if (VECTOR_MODE_P (compute_mode
))
1364 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1365 return compute_type
;
1366 if (code
== MULT_HIGHPART_EXPR
1367 && can_mult_highpart_p (compute_mode
,
1368 TYPE_UNSIGNED (compute_type
)))
1369 return compute_type
;
1371 /* There is no operation in hardware, so fall back to scalars. */
1372 compute_type
= TREE_TYPE (type
);
1375 return compute_type
;
1378 /* Helper function of expand_vector_operations_1. Return number of
1379 vector elements for vector types or 1 for other types. */
1382 count_type_subparts (tree type
)
1384 return VECTOR_TYPE_P (type
) ? TYPE_VECTOR_SUBPARTS (type
) : 1;
1388 do_cond (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
1389 tree bitpos
, tree bitsize
, enum tree_code code
)
1391 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
1392 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
1393 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
1394 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
1395 tree cond
= gimple_assign_rhs1 (gsi_stmt (*gsi
));
1396 return gimplify_build3 (gsi
, code
, inner_type
, cond
, a
, b
);
1399 /* Expand a vector COND_EXPR to scalars, piecewise. */
1401 expand_vector_scalar_condition (gimple_stmt_iterator
*gsi
)
1403 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1404 tree type
= gimple_expr_type (stmt
);
1405 tree compute_type
= get_compute_type (COND_EXPR
, mov_optab
, type
);
1406 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1407 gcc_assert (compute_mode
!= BLKmode
);
1408 tree lhs
= gimple_assign_lhs (stmt
);
1409 tree rhs2
= gimple_assign_rhs2 (stmt
);
1410 tree rhs3
= gimple_assign_rhs3 (stmt
);
1413 /* If the compute mode is not a vector mode (hence we are not decomposing
1414 a BLKmode vector to smaller, hardware-supported vectors), we may want
1415 to expand the operations in parallel. */
1416 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
1417 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
1418 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
1419 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
1420 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
1421 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
1422 new_rhs
= expand_vector_parallel (gsi
, do_cond
, type
, rhs2
, rhs3
,
1425 new_rhs
= expand_vector_piecewise (gsi
, do_cond
, type
, compute_type
,
1426 rhs2
, rhs3
, COND_EXPR
);
1427 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1428 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1431 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1432 way to do it is change expand_vector_operation and its callees to
1433 return a tree_code, RHS1 and RHS2 instead of a tree. */
1434 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1435 update_stmt (gsi_stmt (*gsi
));
1438 /* Process one statement. If we identify a vector operation, expand it. */
1441 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
1443 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
= NULL_TREE
;
1444 enum tree_code code
;
1445 optab op
= unknown_optab
;
1446 enum gimple_rhs_class rhs_class
;
1449 /* Only consider code == GIMPLE_ASSIGN. */
1450 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (*gsi
));
1454 code
= gimple_assign_rhs_code (stmt
);
1455 rhs_class
= get_gimple_rhs_class (code
);
1456 lhs
= gimple_assign_lhs (stmt
);
1458 if (code
== VEC_PERM_EXPR
)
1460 lower_vec_perm (gsi
);
1464 if (code
== VEC_COND_EXPR
)
1466 expand_vector_condition (gsi
);
1470 if (code
== COND_EXPR
1471 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == VECTOR_TYPE
1472 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == BLKmode
)
1474 expand_vector_scalar_condition (gsi
);
1478 if (code
== CONSTRUCTOR
1479 && TREE_CODE (lhs
) == SSA_NAME
1480 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs
)))
1481 && !gimple_clobber_p (stmt
)
1484 optimize_vector_constructor (gsi
);
1488 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
1491 rhs1
= gimple_assign_rhs1 (stmt
);
1492 type
= gimple_expr_type (stmt
);
1493 if (rhs_class
== GIMPLE_BINARY_RHS
)
1494 rhs2
= gimple_assign_rhs2 (stmt
);
1496 if (TREE_CODE (type
) != VECTOR_TYPE
)
1499 if (CONVERT_EXPR_CODE_P (code
)
1500 || code
== FLOAT_EXPR
1501 || code
== FIX_TRUNC_EXPR
1502 || code
== VIEW_CONVERT_EXPR
)
1505 /* The signedness is determined from input argument. */
1506 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
1507 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
1508 type
= TREE_TYPE (rhs1
);
1510 /* For widening/narrowing vector operations, the relevant type is of the
1511 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1512 calculated in the same way above. */
1513 if (code
== WIDEN_SUM_EXPR
1514 || code
== VEC_WIDEN_MULT_HI_EXPR
1515 || code
== VEC_WIDEN_MULT_LO_EXPR
1516 || code
== VEC_WIDEN_MULT_EVEN_EXPR
1517 || code
== VEC_WIDEN_MULT_ODD_EXPR
1518 || code
== VEC_UNPACK_HI_EXPR
1519 || code
== VEC_UNPACK_LO_EXPR
1520 || code
== VEC_PACK_TRUNC_EXPR
1521 || code
== VEC_PACK_SAT_EXPR
1522 || code
== VEC_PACK_FIX_TRUNC_EXPR
1523 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
1524 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
1525 type
= TREE_TYPE (rhs1
);
1527 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1529 if (code
== LSHIFT_EXPR
1530 || code
== RSHIFT_EXPR
1531 || code
== LROTATE_EXPR
1532 || code
== RROTATE_EXPR
)
1536 /* Check whether we have vector <op> {x,x,x,x} where x
1537 could be a scalar variable or a constant. Transform
1538 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1539 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1544 if ((TREE_CODE (rhs2
) == VECTOR_CST
1545 && (first
= uniform_vector_p (rhs2
)) != NULL_TREE
)
1546 || (TREE_CODE (rhs2
) == SSA_NAME
1547 && (def_stmt
= SSA_NAME_DEF_STMT (rhs2
))
1548 && gimple_assign_single_p (def_stmt
)
1549 && (first
= uniform_vector_p
1550 (gimple_assign_rhs1 (def_stmt
))) != NULL_TREE
))
1552 gimple_assign_set_rhs2 (stmt
, first
);
1558 opv
= optab_for_tree_code (code
, type
, optab_vector
);
1559 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1563 op
= optab_for_tree_code (code
, type
, optab_scalar
);
1565 compute_type
= get_compute_type (code
, op
, type
);
1566 if (compute_type
== type
)
1568 /* The rtl expander will expand vector/scalar as vector/vector
1569 if necessary. Pick one with wider vector type. */
1570 tree compute_vtype
= get_compute_type (code
, opv
, type
);
1571 if (count_type_subparts (compute_vtype
)
1572 > count_type_subparts (compute_type
))
1574 compute_type
= compute_vtype
;
1579 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1581 if (compute_type
== NULL_TREE
)
1582 compute_type
= get_compute_type (code
, op
, type
);
1583 if (compute_type
== type
)
1585 /* Before splitting vector rotates into scalar rotates,
1586 see if we can't use vector shifts and BIT_IOR_EXPR
1587 instead. For vector by vector rotates we'd also
1588 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1589 for now, fold doesn't seem to create such rotates anyway. */
1590 if (compute_type
== TREE_TYPE (type
)
1591 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1593 optab oplv
= vashl_optab
, opl
= ashl_optab
;
1594 optab oprv
= vlshr_optab
, opr
= lshr_optab
, opo
= ior_optab
;
1595 tree compute_lvtype
= get_compute_type (LSHIFT_EXPR
, oplv
, type
);
1596 tree compute_rvtype
= get_compute_type (RSHIFT_EXPR
, oprv
, type
);
1597 tree compute_otype
= get_compute_type (BIT_IOR_EXPR
, opo
, type
);
1598 tree compute_ltype
= get_compute_type (LSHIFT_EXPR
, opl
, type
);
1599 tree compute_rtype
= get_compute_type (RSHIFT_EXPR
, opr
, type
);
1600 /* The rtl expander will expand vector/scalar as vector/vector
1601 if necessary. Pick one with wider vector type. */
1602 if (count_type_subparts (compute_lvtype
)
1603 > count_type_subparts (compute_ltype
))
1605 compute_ltype
= compute_lvtype
;
1608 if (count_type_subparts (compute_rvtype
)
1609 > count_type_subparts (compute_rtype
))
1611 compute_rtype
= compute_rvtype
;
1614 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1616 compute_type
= compute_ltype
;
1617 if (count_type_subparts (compute_type
)
1618 > count_type_subparts (compute_rtype
))
1619 compute_type
= compute_rtype
;
1620 if (count_type_subparts (compute_type
)
1621 > count_type_subparts (compute_otype
))
1622 compute_type
= compute_otype
;
1623 /* Verify all 3 operations can be performed in that type. */
1624 if (compute_type
!= TREE_TYPE (type
))
1626 if (optab_handler (opl
, TYPE_MODE (compute_type
))
1628 || optab_handler (opr
, TYPE_MODE (compute_type
))
1630 || optab_handler (opo
, TYPE_MODE (compute_type
))
1631 == CODE_FOR_nothing
)
1632 compute_type
= TREE_TYPE (type
);
1638 op
= optab_for_tree_code (code
, type
, optab_default
);
1640 /* Optabs will try converting a negation into a subtraction, so
1641 look for it as well. TODO: negation of floating-point vectors
1642 might be turned into an exclusive OR toggling the sign bit. */
1643 if (op
== unknown_optab
1644 && code
== NEGATE_EXPR
1645 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
1646 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
1648 if (compute_type
== NULL_TREE
)
1649 compute_type
= get_compute_type (code
, op
, type
);
1650 if (compute_type
== type
)
1653 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
1655 /* Leave expression untouched for later expansion. */
1656 if (new_rhs
== NULL_TREE
)
1659 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1660 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1663 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1664 way to do it is change expand_vector_operation and its callees to
1665 return a tree_code, RHS1 and RHS2 instead of a tree. */
1666 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1667 update_stmt (gsi_stmt (*gsi
));
1670 /* Use this to lower vector operations introduced by the vectorizer,
1671 if it may need the bit-twiddling tricks implemented in this file. */
1674 expand_vector_operations (void)
1676 gimple_stmt_iterator gsi
;
1678 bool cfg_changed
= false;
1680 FOR_EACH_BB_FN (bb
, cfun
)
1682 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1684 expand_vector_operations_1 (&gsi
);
1685 /* ??? If we do not cleanup EH then we will ICE in
1686 verification. But in reality we have created wrong-code
1687 as we did not properly transition EH info and edges to
1688 the piecewise computations. */
1689 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
1690 && gimple_purge_dead_eh_edges (bb
))
1695 return cfg_changed
? TODO_cleanup_cfg
: 0;
1700 const pass_data pass_data_lower_vector
=
1702 GIMPLE_PASS
, /* type */
1703 "veclower", /* name */
1704 OPTGROUP_VEC
, /* optinfo_flags */
1705 TV_NONE
, /* tv_id */
1706 PROP_cfg
, /* properties_required */
1707 PROP_gimple_lvec
, /* properties_provided */
1708 0, /* properties_destroyed */
1709 0, /* todo_flags_start */
1710 TODO_update_ssa
, /* todo_flags_finish */
1713 class pass_lower_vector
: public gimple_opt_pass
1716 pass_lower_vector (gcc::context
*ctxt
)
1717 : gimple_opt_pass (pass_data_lower_vector
, ctxt
)
1720 /* opt_pass methods: */
1721 virtual bool gate (function
*fun
)
1723 return !(fun
->curr_properties
& PROP_gimple_lvec
);
1726 virtual unsigned int execute (function
*)
1728 return expand_vector_operations ();
1731 }; // class pass_lower_vector
1736 make_pass_lower_vector (gcc::context
*ctxt
)
1738 return new pass_lower_vector (ctxt
);
1743 const pass_data pass_data_lower_vector_ssa
=
1745 GIMPLE_PASS
, /* type */
1746 "veclower2", /* name */
1747 OPTGROUP_VEC
, /* optinfo_flags */
1748 TV_NONE
, /* tv_id */
1749 PROP_cfg
, /* properties_required */
1750 PROP_gimple_lvec
, /* properties_provided */
1751 0, /* properties_destroyed */
1752 0, /* todo_flags_start */
1754 | TODO_cleanup_cfg
), /* todo_flags_finish */
1757 class pass_lower_vector_ssa
: public gimple_opt_pass
1760 pass_lower_vector_ssa (gcc::context
*ctxt
)
1761 : gimple_opt_pass (pass_data_lower_vector_ssa
, ctxt
)
1764 /* opt_pass methods: */
1765 opt_pass
* clone () { return new pass_lower_vector_ssa (m_ctxt
); }
1766 virtual unsigned int execute (function
*)
1768 return expand_vector_operations ();
1771 }; // class pass_lower_vector_ssa
1776 make_pass_lower_vector_ssa (gcc::context
*ctxt
)
1778 return new pass_lower_vector_ssa (ctxt
);
1781 #include "gt-tree-vect-generic.h"