1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
42 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
45 /* Build a constant of type TYPE, made of VALUE's bits replicated
46 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
48 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
50 int width
= tree_to_uhwi (TYPE_SIZE (inner_type
));
51 int n
= (TYPE_PRECISION (type
) + HOST_BITS_PER_WIDE_INT
- 1)
52 / HOST_BITS_PER_WIDE_INT
;
53 unsigned HOST_WIDE_INT low
, mask
;
54 HOST_WIDE_INT a
[WIDE_INT_MAX_ELTS
];
57 gcc_assert (n
&& n
<= WIDE_INT_MAX_ELTS
);
59 if (width
== HOST_BITS_PER_WIDE_INT
)
63 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
64 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
67 for (i
= 0; i
< n
; i
++)
70 gcc_assert (TYPE_PRECISION (type
) <= MAX_BITSIZE_MODE_ANY_INT
);
71 return wide_int_to_tree
72 (type
, wide_int::from_array (a
, n
, TYPE_PRECISION (type
)));
75 static GTY(()) tree vector_inner_type
;
76 static GTY(()) tree vector_last_type
;
77 static GTY(()) int vector_last_nunits
;
79 /* Return a suitable vector types made of SUBPARTS units each of mode
80 "word_mode" (the global variable). */
82 build_word_mode_vector_type (int nunits
)
84 if (!vector_inner_type
)
85 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
86 else if (vector_last_nunits
== nunits
)
88 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
89 return vector_last_type
;
92 /* We build a new type, but we canonicalize it nevertheless,
93 because it still saves some memory. */
94 vector_last_nunits
= nunits
;
95 vector_last_type
= type_hash_canon (nunits
,
96 build_vector_type (vector_inner_type
,
98 return vector_last_type
;
101 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
102 tree
, tree
, tree
, tree
, tree
, enum tree_code
,
106 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
107 tree t
, tree bitsize
, tree bitpos
)
109 if (TREE_CODE (t
) == SSA_NAME
)
111 gimple
*def_stmt
= SSA_NAME_DEF_STMT (t
);
112 if (is_gimple_assign (def_stmt
)
113 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
115 && gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
)))
116 t
= gimple_assign_rhs1 (def_stmt
);
120 if (TREE_CODE (type
) == BOOLEAN_TYPE
)
123 = build_nonstandard_integer_type (tree_to_uhwi (bitsize
), 0);
124 tree field
= gimplify_build3 (gsi
, BIT_FIELD_REF
, itype
, t
,
126 return gimplify_build2 (gsi
, NE_EXPR
, type
, field
,
127 build_zero_cst (itype
));
130 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
133 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
137 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
138 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
139 enum tree_code code
, tree type ATTRIBUTE_UNUSED
)
141 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
142 return gimplify_build1 (gsi
, code
, inner_type
, a
);
146 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
147 tree bitpos
, tree bitsize
, enum tree_code code
,
148 tree type ATTRIBUTE_UNUSED
)
150 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
151 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
152 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
153 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
154 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
157 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
159 INNER_TYPE is the type of A and B elements
161 returned expression is of signed integer type with the
162 size equal to the size of INNER_TYPE. */
164 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
165 tree bitpos
, tree bitsize
, enum tree_code code
, tree type
)
167 tree stype
= TREE_TYPE (type
);
168 tree cst_false
= build_zero_cst (stype
);
169 tree cst_true
= build_all_ones_cst (stype
);
172 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
173 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
175 cmp
= build2 (code
, boolean_type_node
, a
, b
);
176 return gimplify_build3 (gsi
, COND_EXPR
, stype
, cmp
, cst_true
, cst_false
);
179 /* Expand vector addition to scalars. This does bit twiddling
180 in order to increase parallelism:
182 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
185 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
186 (a ^ ~b) & 0x80808080
188 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
190 This optimization should be done only if 4 vector items or more
193 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
194 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
195 enum tree_code code
, tree type ATTRIBUTE_UNUSED
)
197 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
198 unsigned HOST_WIDE_INT max
;
199 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
201 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
202 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
203 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
205 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
206 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
208 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
209 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
210 if (code
== PLUS_EXPR
)
211 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
214 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
215 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
218 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
219 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
220 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
224 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
225 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
226 tree bitsize ATTRIBUTE_UNUSED
,
227 enum tree_code code ATTRIBUTE_UNUSED
,
228 tree type ATTRIBUTE_UNUSED
)
230 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
232 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
234 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
235 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
236 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
238 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
240 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
241 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
242 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
243 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
244 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
247 /* Expand a vector operation to scalars, by using many operations
248 whose type is the vector type's inner type. */
250 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
251 tree type
, tree inner_type
,
252 tree a
, tree b
, enum tree_code code
)
254 vec
<constructor_elt
, va_gc
> *v
;
255 tree part_width
= TYPE_SIZE (inner_type
);
256 tree index
= bitsize_int (0);
257 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
258 int delta
= tree_to_uhwi (part_width
)
259 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type
)));
261 location_t loc
= gimple_location (gsi_stmt (*gsi
));
263 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
264 warning_at (loc
, OPT_Wvector_operation_performance
,
265 "vector operation will be expanded piecewise");
267 warning_at (loc
, OPT_Wvector_operation_performance
,
268 "vector operation will be expanded in parallel");
270 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
271 for (i
= 0; i
< nunits
;
272 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
274 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
, type
);
275 constructor_elt ce
= {NULL_TREE
, result
};
279 return build_constructor (type
, v
);
282 /* Expand a vector operation to scalars with the freedom to use
283 a scalar integer type, or to use a different size for the items
284 in the vector type. */
286 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
290 tree result
, compute_type
;
292 int n_words
= tree_to_uhwi (TYPE_SIZE_UNIT (type
)) / UNITS_PER_WORD
;
293 location_t loc
= gimple_location (gsi_stmt (*gsi
));
295 /* We have three strategies. If the type is already correct, just do
296 the operation an element at a time. Else, if the vector is wider than
297 one word, do it a word at a time; finally, if the vector is smaller
298 than one word, do it as a scalar. */
299 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
300 return expand_vector_piecewise (gsi
, f
,
301 type
, TREE_TYPE (type
),
303 else if (n_words
> 1)
305 tree word_type
= build_word_mode_vector_type (n_words
);
306 result
= expand_vector_piecewise (gsi
, f
,
307 word_type
, TREE_TYPE (word_type
),
309 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
314 /* Use a single scalar operation with a mode no wider than word_mode. */
315 mode
= mode_for_size (tree_to_uhwi (TYPE_SIZE (type
)), MODE_INT
, 0);
316 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
317 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
, type
);
318 warning_at (loc
, OPT_Wvector_operation_performance
,
319 "vector operation will be expanded with a "
320 "single scalar operation");
326 /* Expand a vector operation to scalars; for integer types we can use
327 special bit twiddling tricks to do the sums a word at a time, using
328 function F_PARALLEL instead of F. These tricks are done only if
329 they can process at least four items, that is, only if the vector
330 holds at least four items and if a word can hold four items. */
332 expand_vector_addition (gimple_stmt_iterator
*gsi
,
333 elem_op_func f
, elem_op_func f_parallel
,
334 tree type
, tree a
, tree b
, enum tree_code code
)
336 int parts_per_word
= UNITS_PER_WORD
337 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
339 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
340 && parts_per_word
>= 4
341 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
342 return expand_vector_parallel (gsi
, f_parallel
,
345 return expand_vector_piecewise (gsi
, f
,
346 type
, TREE_TYPE (type
),
350 /* Try to expand vector comparison expression OP0 CODE OP1 by
351 querying optab if the following expression:
352 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
355 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
356 tree op1
, enum tree_code code
)
359 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0
), type
)
360 && !expand_vec_cond_expr_p (type
, TREE_TYPE (op0
)))
361 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
362 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
369 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
370 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
371 the result if successful, otherwise return NULL_TREE. */
373 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
376 unsigned int i
, nunits
= TYPE_VECTOR_SUBPARTS (type
);
377 bool scalar_shift
= true;
379 for (i
= 1; i
< nunits
; i
++)
381 if (shiftcnts
[i
] != shiftcnts
[0])
382 scalar_shift
= false;
385 if (scalar_shift
&& shiftcnts
[0] == 0)
390 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
391 if (op
!= unknown_optab
392 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
393 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
394 build_int_cst (NULL_TREE
, shiftcnts
[0]));
397 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
398 if (op
!= unknown_optab
399 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
401 tree
*vec
= XALLOCAVEC (tree
, nunits
);
402 for (i
= 0; i
< nunits
; i
++)
403 vec
[i
] = build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]);
404 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
405 build_vector (type
, vec
));
411 /* Try to expand integer vector division by constant using
412 widening multiply, shifts and additions. */
414 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
415 tree op1
, enum tree_code code
)
417 bool use_pow2
= true;
418 bool has_vector_shift
= true;
419 int mode
= -1, this_mode
;
420 int pre_shift
= -1, post_shift
;
421 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (type
);
422 int *shifts
= XALLOCAVEC (int, nunits
* 4);
423 int *pre_shifts
= shifts
+ nunits
;
424 int *post_shifts
= pre_shifts
+ nunits
;
425 int *shift_temps
= post_shifts
+ nunits
;
426 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
427 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
430 signop sign_p
= TYPE_SIGN (TREE_TYPE (type
));
431 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
433 tree cur_op
, mulcst
, tem
;
436 if (prec
> HOST_BITS_PER_WIDE_INT
)
439 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
440 if (op
== unknown_optab
441 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
442 has_vector_shift
= false;
444 /* Analysis phase. Determine if all op1 elements are either power
445 of two and it is possible to expand it using shifts (or for remainder
446 using masking). Additionally compute the multiplicative constants
447 and pre and post shifts if the division is to be expanded using
448 widening or high part multiplication plus shifts. */
449 for (i
= 0; i
< nunits
; i
++)
451 tree cst
= VECTOR_CST_ELT (op1
, i
);
452 unsigned HOST_WIDE_INT ml
;
454 if (TREE_CODE (cst
) != INTEGER_CST
|| integer_zerop (cst
))
460 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
464 shifts
[i
] = tree_log2 (cst
);
465 if (shifts
[i
] != shifts
[0]
466 && code
== TRUNC_DIV_EXPR
467 && !has_vector_shift
)
472 if (sign_p
== UNSIGNED
)
474 unsigned HOST_WIDE_INT mh
;
475 unsigned HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
) & mask
;
477 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
478 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
487 /* Find a suitable multiplier and right shift count
488 instead of multiplying with D. */
489 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
491 /* If the suggested multiplier is more than SIZE bits, we can
492 do better for even divisors, using an initial right shift. */
493 if ((mh
!= 0 && (d
& 1) == 0)
494 || (!has_vector_shift
&& pre_shift
!= -1))
496 if (has_vector_shift
)
497 pre_shift
= floor_log2 (d
& -d
);
498 else if (pre_shift
== -1)
501 for (j
= 0; j
< nunits
; j
++)
503 tree cst2
= VECTOR_CST_ELT (op1
, j
);
504 unsigned HOST_WIDE_INT d2
;
507 if (!tree_fits_uhwi_p (cst2
))
509 d2
= tree_to_uhwi (cst2
) & mask
;
512 this_pre_shift
= floor_log2 (d2
& -d2
);
513 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
514 pre_shift
= this_pre_shift
;
516 if (i
!= 0 && pre_shift
!= 0)
526 if ((d
>> pre_shift
) <= 1)
531 mh
= choose_multiplier (d
>> pre_shift
, prec
,
533 &ml
, &post_shift
, &dummy_int
);
535 pre_shifts
[i
] = pre_shift
;
545 HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
);
546 unsigned HOST_WIDE_INT abs_d
;
551 /* Since d might be INT_MIN, we have to cast to
552 unsigned HOST_WIDE_INT before negating to avoid
553 undefined signed overflow. */
555 ? (unsigned HOST_WIDE_INT
) d
556 : - (unsigned HOST_WIDE_INT
) d
);
558 /* n rem d = n rem -d */
559 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
561 else if (abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
563 /* This case is not handled correctly below. */
573 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
574 &post_shift
, &dummy_int
);
575 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
577 this_mode
= 4 + (d
< 0);
578 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
581 this_mode
= 2 + (d
< 0);
584 post_shifts
[i
] = post_shift
;
585 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
586 || post_shift
>= prec
587 || pre_shifts
[i
] >= prec
)
592 else if (mode
!= this_mode
)
596 vec
= XALLOCAVEC (tree
, nunits
);
600 tree addend
= NULL_TREE
;
601 if (sign_p
== SIGNED
)
605 /* Both division and remainder sequences need
606 op0 < 0 ? mask : 0 computed. It can be either computed as
607 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
608 if none of the shifts is 0, or as the conditional. */
609 for (i
= 0; i
< nunits
; i
++)
613 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
615 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
617 for (i
= 0; i
< nunits
; i
++)
618 shift_temps
[i
] = prec
- 1;
619 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
620 if (cur_op
!= NULL_TREE
)
622 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
624 for (i
= 0; i
< nunits
; i
++)
625 shift_temps
[i
] = prec
- shifts
[i
];
626 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
627 if (cur_op
!= NULL_TREE
)
628 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
632 if (addend
== NULL_TREE
633 && expand_vec_cond_expr_p (type
, type
))
635 tree zero
, cst
, cond
, mask_type
;
638 mask_type
= build_same_sized_truth_vector_type (type
);
639 zero
= build_zero_cst (type
);
640 cond
= build2 (LT_EXPR
, mask_type
, op0
, zero
);
641 for (i
= 0; i
< nunits
; i
++)
642 vec
[i
] = build_int_cst (TREE_TYPE (type
),
643 ((unsigned HOST_WIDE_INT
) 1
645 cst
= build_vector (type
, vec
);
646 addend
= make_ssa_name (type
);
647 stmt
= gimple_build_assign (addend
, VEC_COND_EXPR
, cond
,
649 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
652 if (code
== TRUNC_DIV_EXPR
)
654 if (sign_p
== UNSIGNED
)
656 /* q = op0 >> shift; */
657 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
658 if (cur_op
!= NULL_TREE
)
661 else if (addend
!= NULL_TREE
)
663 /* t1 = op0 + addend;
665 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
666 if (op
!= unknown_optab
667 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
669 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
670 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
671 if (cur_op
!= NULL_TREE
)
679 for (i
= 0; i
< nunits
; i
++)
680 vec
[i
] = build_int_cst (TREE_TYPE (type
),
681 ((unsigned HOST_WIDE_INT
) 1
683 mask
= build_vector (type
, vec
);
684 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
685 if (op
!= unknown_optab
686 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
688 if (sign_p
== UNSIGNED
)
689 /* r = op0 & mask; */
690 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
691 else if (addend
!= NULL_TREE
)
693 /* t1 = op0 + addend;
696 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
697 if (op
!= unknown_optab
698 && optab_handler (op
, TYPE_MODE (type
))
701 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
703 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
705 op
= optab_for_tree_code (MINUS_EXPR
, type
,
707 if (op
!= unknown_optab
708 && optab_handler (op
, TYPE_MODE (type
))
710 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
718 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
721 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
729 gcc_assert (sign_p
== UNSIGNED
);
730 /* t1 = oprnd0 >> pre_shift;
732 q = t2 >> post_shift; */
733 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
734 if (cur_op
== NULL_TREE
)
738 gcc_assert (sign_p
== UNSIGNED
);
739 for (i
= 0; i
< nunits
; i
++)
749 gcc_assert (sign_p
== SIGNED
);
750 for (i
= 0; i
< nunits
; i
++)
751 shift_temps
[i
] = prec
- 1;
757 for (i
= 0; i
< nunits
; i
++)
758 vec
[i
] = build_int_cst (TREE_TYPE (type
), mulc
[i
]);
759 mulcst
= build_vector (type
, vec
);
761 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
766 /* t1 = oprnd0 >> pre_shift;
768 q = t2 >> post_shift; */
769 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
772 /* t1 = oprnd0 h* ml;
776 q = t4 >> (post_shift - 1); */
777 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
778 if (op
== unknown_optab
779 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
781 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
782 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
783 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
784 if (op
== unknown_optab
785 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
787 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
788 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
789 if (cur_op
== NULL_TREE
)
796 /* t1 = oprnd0 h* ml;
797 t2 = t1; [ iff (mode & 2) != 0 ]
798 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
799 t3 = t2 >> post_shift;
800 t4 = oprnd0 >> (prec - 1);
801 q = t3 - t4; [ iff (mode & 1) == 0 ]
802 q = t4 - t3; [ iff (mode & 1) != 0 ] */
805 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
806 if (op
== unknown_optab
807 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
809 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
811 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
812 if (cur_op
== NULL_TREE
)
814 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
815 if (tem
== NULL_TREE
)
817 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
818 if (op
== unknown_optab
819 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
822 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
824 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
830 if (code
== TRUNC_DIV_EXPR
)
833 /* We divided. Now finish by:
836 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
837 if (op
== unknown_optab
838 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
840 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
841 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
842 if (op
== unknown_optab
843 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
845 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
848 /* Expand a vector condition to scalars, by using many conditions
849 on the vector's elements. */
851 expand_vector_condition (gimple_stmt_iterator
*gsi
)
853 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
854 tree type
= gimple_expr_type (stmt
);
855 tree a
= gimple_assign_rhs1 (stmt
);
858 bool a_is_comparison
= false;
859 tree b
= gimple_assign_rhs2 (stmt
);
860 tree c
= gimple_assign_rhs3 (stmt
);
861 vec
<constructor_elt
, va_gc
> *v
;
863 tree inner_type
= TREE_TYPE (type
);
864 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
865 tree comp_inner_type
= cond_type
;
866 tree width
= TYPE_SIZE (inner_type
);
867 tree index
= bitsize_int (0);
868 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
870 location_t loc
= gimple_location (gsi_stmt (*gsi
));
872 if (!is_gimple_val (a
))
874 gcc_assert (COMPARISON_CLASS_P (a
));
875 a_is_comparison
= true;
876 a1
= TREE_OPERAND (a
, 0);
877 a2
= TREE_OPERAND (a
, 1);
878 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
881 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
)))
884 /* TODO: try and find a smaller vector type. */
886 warning_at (loc
, OPT_Wvector_operation_performance
,
887 "vector condition will be expanded piecewise");
889 vec_alloc (v
, nunits
);
890 for (i
= 0; i
< nunits
;
891 i
++, index
= int_const_binop (PLUS_EXPR
, index
, width
))
894 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
895 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
898 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
, width
, index
);
899 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
, width
, index
);
900 aa
= build2 (TREE_CODE (a
), cond_type
, aa1
, aa2
);
903 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
904 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
905 constructor_elt ce
= {NULL_TREE
, result
};
909 constr
= build_constructor (type
, v
);
910 gimple_assign_set_rhs_from_tree (gsi
, constr
);
911 update_stmt (gsi_stmt (*gsi
));
915 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
916 gassign
*assign
, enum tree_code code
)
918 machine_mode compute_mode
= TYPE_MODE (compute_type
);
920 /* If the compute mode is not a vector mode (hence we are not decomposing
921 a BLKmode vector to smaller, hardware-supported vectors), we may want
922 to expand the operations in parallel. */
923 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
924 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
925 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
926 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
927 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
928 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
933 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
934 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
935 gimple_assign_rhs1 (assign
),
936 gimple_assign_rhs2 (assign
), code
);
940 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
941 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
942 gimple_assign_rhs1 (assign
),
949 return expand_vector_parallel (gsi
, do_binop
, type
,
950 gimple_assign_rhs1 (assign
),
951 gimple_assign_rhs2 (assign
), code
);
954 return expand_vector_parallel (gsi
, do_unop
, type
,
955 gimple_assign_rhs1 (assign
),
972 tree rhs1
= gimple_assign_rhs1 (assign
);
973 tree rhs2
= gimple_assign_rhs2 (assign
);
975 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
981 tree rhs1
= gimple_assign_rhs1 (assign
);
982 tree rhs2
= gimple_assign_rhs2 (assign
);
986 || !VECTOR_INTEGER_TYPE_P (type
)
987 || TREE_CODE (rhs2
) != VECTOR_CST
988 || !VECTOR_MODE_P (TYPE_MODE (type
)))
991 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
992 if (ret
!= NULL_TREE
)
1001 if (TREE_CODE_CLASS (code
) == tcc_unary
)
1002 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
1003 gimple_assign_rhs1 (assign
),
1006 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
1007 gimple_assign_rhs1 (assign
),
1008 gimple_assign_rhs2 (assign
), code
);
1012 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1014 _9 = { b_7, b_7, b_7, b_7 };
1015 a_5 = _9 + { 0, 3, 6, 9 };
1016 because vector splat operation is usually more efficient
1017 than piecewise initialization of the vector. */
1020 optimize_vector_constructor (gimple_stmt_iterator
*gsi
)
1022 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1023 tree lhs
= gimple_assign_lhs (stmt
);
1024 tree rhs
= gimple_assign_rhs1 (stmt
);
1025 tree type
= TREE_TYPE (rhs
);
1026 unsigned int i
, j
, nelts
= TYPE_VECTOR_SUBPARTS (type
);
1027 bool all_same
= true;
1028 constructor_elt
*elt
;
1031 tree base
= NULL_TREE
;
1034 if (nelts
<= 2 || CONSTRUCTOR_NELTS (rhs
) != nelts
)
1036 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
1037 if (op
== unknown_optab
1038 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
1040 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
1041 if (TREE_CODE (elt
->value
) != SSA_NAME
1042 || TREE_CODE (TREE_TYPE (elt
->value
)) == VECTOR_TYPE
)
1046 tree this_base
= elt
->value
;
1047 if (this_base
!= CONSTRUCTOR_ELT (rhs
, 0)->value
)
1049 for (j
= 0; j
< nelts
+ 1; j
++)
1051 g
= SSA_NAME_DEF_STMT (this_base
);
1052 if (is_gimple_assign (g
)
1053 && gimple_assign_rhs_code (g
) == PLUS_EXPR
1054 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
1055 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
1056 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
1057 this_base
= gimple_assign_rhs1 (g
);
1063 else if (this_base
!= base
)
1068 cst
= XALLOCAVEC (tree
, nelts
);
1069 for (i
= 0; i
< nelts
; i
++)
1071 tree this_base
= CONSTRUCTOR_ELT (rhs
, i
)->value
;;
1072 cst
[i
] = build_zero_cst (TREE_TYPE (base
));
1073 while (this_base
!= base
)
1075 g
= SSA_NAME_DEF_STMT (this_base
);
1076 cst
[i
] = fold_binary (PLUS_EXPR
, TREE_TYPE (base
),
1077 cst
[i
], gimple_assign_rhs2 (g
));
1078 if (cst
[i
] == NULL_TREE
1079 || TREE_CODE (cst
[i
]) != INTEGER_CST
1080 || TREE_OVERFLOW (cst
[i
]))
1082 this_base
= gimple_assign_rhs1 (g
);
1085 for (i
= 0; i
< nelts
; i
++)
1086 CONSTRUCTOR_ELT (rhs
, i
)->value
= base
;
1087 g
= gimple_build_assign (make_ssa_name (type
), rhs
);
1088 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1089 g
= gimple_build_assign (lhs
, PLUS_EXPR
, gimple_assign_lhs (g
),
1090 build_vector (type
, cst
));
1091 gsi_replace (gsi
, g
, false);
1094 /* Return a type for the widest vector mode whose components are of type
1095 TYPE, or NULL_TREE if none is found. */
1098 type_for_widest_vector_mode (tree type
, optab op
)
1100 machine_mode inner_mode
= TYPE_MODE (type
);
1101 machine_mode best_mode
= VOIDmode
, mode
;
1102 int best_nunits
= 0;
1104 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1105 mode
= MIN_MODE_VECTOR_FLOAT
;
1106 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1107 mode
= MIN_MODE_VECTOR_FRACT
;
1108 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1109 mode
= MIN_MODE_VECTOR_UFRACT
;
1110 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1111 mode
= MIN_MODE_VECTOR_ACCUM
;
1112 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1113 mode
= MIN_MODE_VECTOR_UACCUM
;
1115 mode
= MIN_MODE_VECTOR_INT
;
1117 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
1118 if (GET_MODE_INNER (mode
) == inner_mode
1119 && GET_MODE_NUNITS (mode
) > best_nunits
1120 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1121 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1123 if (best_mode
== VOIDmode
)
1126 return build_vector_type_for_mode (type
, best_mode
);
1130 /* Build a reference to the element of the vector VECT. Function
1131 returns either the element itself, either BIT_FIELD_REF, or an
1132 ARRAY_REF expression.
1134 GSI is required to insert temporary variables while building a
1135 refernece to the element of the vector VECT.
1137 PTMPVEC is a pointer to the temporary variable for caching
1138 purposes. In case when PTMPVEC is NULL new temporary variable
1141 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1143 tree vect_type
, vect_elt_type
;
1147 bool need_asgn
= true;
1148 unsigned int elements
;
1150 vect_type
= TREE_TYPE (vect
);
1151 vect_elt_type
= TREE_TYPE (vect_type
);
1152 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1154 if (TREE_CODE (idx
) == INTEGER_CST
)
1156 unsigned HOST_WIDE_INT index
;
1158 /* Given that we're about to compute a binary modulus,
1159 we don't care about the high bits of the value. */
1160 index
= TREE_INT_CST_LOW (idx
);
1161 if (!tree_fits_uhwi_p (idx
) || index
>= elements
)
1163 index
&= elements
- 1;
1164 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1167 /* When lowering a vector statement sequence do some easy
1168 simplification by looking through intermediate vector results. */
1169 if (TREE_CODE (vect
) == SSA_NAME
)
1171 gimple
*def_stmt
= SSA_NAME_DEF_STMT (vect
);
1172 if (is_gimple_assign (def_stmt
)
1173 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1174 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1175 vect
= gimple_assign_rhs1 (def_stmt
);
1178 if (TREE_CODE (vect
) == VECTOR_CST
)
1179 return VECTOR_CST_ELT (vect
, index
);
1180 else if (TREE_CODE (vect
) == CONSTRUCTOR
1181 && (CONSTRUCTOR_NELTS (vect
) == 0
1182 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1185 if (index
< CONSTRUCTOR_NELTS (vect
))
1186 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1187 return build_zero_cst (vect_elt_type
);
1191 tree size
= TYPE_SIZE (vect_elt_type
);
1192 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1194 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1199 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1201 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1210 TREE_ADDRESSABLE (tmpvec
) = 1;
1211 asgn
= gimple_build_assign (tmpvec
, vect
);
1212 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1215 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1216 return build4 (ARRAY_REF
, vect_elt_type
,
1217 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1218 idx
, NULL_TREE
, NULL_TREE
);
1221 /* Check if VEC_PERM_EXPR within the given setting is supported
1222 by hardware, or lower it piecewise.
1224 When VEC_PERM_EXPR has the same first and second operands:
1225 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1226 {v0[mask[0]], v0[mask[1]], ...}
1227 MASK and V0 must have the same number of elements.
1229 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1230 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1231 V0 and V1 must have the same type. MASK, V0, V1 must have the
1232 same number of arguments. */
1235 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1237 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1238 tree mask
= gimple_assign_rhs3 (stmt
);
1239 tree vec0
= gimple_assign_rhs1 (stmt
);
1240 tree vec1
= gimple_assign_rhs2 (stmt
);
1241 tree vect_type
= TREE_TYPE (vec0
);
1242 tree mask_type
= TREE_TYPE (mask
);
1243 tree vect_elt_type
= TREE_TYPE (vect_type
);
1244 tree mask_elt_type
= TREE_TYPE (mask_type
);
1245 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1246 vec
<constructor_elt
, va_gc
> *v
;
1247 tree constr
, t
, si
, i_val
;
1248 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1249 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1250 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1253 if (TREE_CODE (mask
) == SSA_NAME
)
1255 gimple
*def_stmt
= SSA_NAME_DEF_STMT (mask
);
1256 if (is_gimple_assign (def_stmt
)
1257 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1258 mask
= gimple_assign_rhs1 (def_stmt
);
1261 if (TREE_CODE (mask
) == VECTOR_CST
)
1263 unsigned char *sel_int
= XALLOCAVEC (unsigned char, elements
);
1265 for (i
= 0; i
< elements
; ++i
)
1266 sel_int
[i
] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask
, i
))
1267 & (2 * elements
- 1));
1269 if (can_vec_perm_p (TYPE_MODE (vect_type
), false, sel_int
))
1271 gimple_assign_set_rhs3 (stmt
, mask
);
1275 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1276 vector as VEC1 and a right element shift MASK. */
1277 if (optab_handler (vec_shr_optab
, TYPE_MODE (vect_type
))
1279 && TREE_CODE (vec1
) == VECTOR_CST
1280 && initializer_zerop (vec1
)
1282 && sel_int
[0] < elements
)
1284 for (i
= 1; i
< elements
; ++i
)
1286 unsigned int expected
= i
+ sel_int
[0];
1287 /* Indices into the second vector are all equivalent. */
1288 if (MIN (elements
, (unsigned) sel_int
[i
])
1289 != MIN (elements
, expected
))
1294 gimple_assign_set_rhs3 (stmt
, mask
);
1300 else if (can_vec_perm_p (TYPE_MODE (vect_type
), true, NULL
))
1303 warning_at (loc
, OPT_Wvector_operation_performance
,
1304 "vector shuffling operation will be expanded piecewise");
1306 vec_alloc (v
, elements
);
1307 for (i
= 0; i
< elements
; i
++)
1310 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1312 if (TREE_CODE (i_val
) == INTEGER_CST
)
1314 unsigned HOST_WIDE_INT index
;
1316 index
= TREE_INT_CST_LOW (i_val
);
1317 if (!tree_fits_uhwi_p (i_val
) || index
>= elements
)
1318 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1320 if (two_operand_p
&& (index
& elements
) != 0)
1321 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1323 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1325 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1326 true, GSI_SAME_STMT
);
1330 tree cond
= NULL_TREE
, v0_val
;
1334 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1335 build_int_cst (mask_elt_type
, elements
));
1336 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1337 true, GSI_SAME_STMT
);
1340 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1341 build_int_cst (mask_elt_type
, elements
- 1));
1342 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1343 true, GSI_SAME_STMT
);
1345 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1346 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1347 true, GSI_SAME_STMT
);
1353 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1354 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1355 true, GSI_SAME_STMT
);
1357 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1358 cond
, build_zero_cst (mask_elt_type
));
1359 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1360 cond
, v0_val
, v1_val
);
1361 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1362 true, GSI_SAME_STMT
);
1368 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1371 constr
= build_constructor (vect_type
, v
);
1372 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1373 update_stmt (gsi_stmt (*gsi
));
1376 /* If OP is a uniform vector return the element it is a splat from. */
1379 ssa_uniform_vector_p (tree op
)
1381 if (TREE_CODE (op
) == VECTOR_CST
1382 || TREE_CODE (op
) == CONSTRUCTOR
)
1383 return uniform_vector_p (op
);
1384 if (TREE_CODE (op
) == SSA_NAME
)
1386 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
1387 if (gimple_assign_single_p (def_stmt
))
1388 return uniform_vector_p (gimple_assign_rhs1 (def_stmt
));
1393 /* Return type in which CODE operation with optab OP can be
1397 get_compute_type (enum tree_code code
, optab op
, tree type
)
1399 /* For very wide vectors, try using a smaller vector mode. */
1400 tree compute_type
= type
;
1402 && (!VECTOR_MODE_P (TYPE_MODE (type
))
1403 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
))
1405 tree vector_compute_type
1406 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1407 if (vector_compute_type
!= NULL_TREE
1408 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
1409 < TYPE_VECTOR_SUBPARTS (compute_type
))
1410 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1411 != CODE_FOR_nothing
))
1412 compute_type
= vector_compute_type
;
1415 /* If we are breaking a BLKmode vector into smaller pieces,
1416 type_for_widest_vector_mode has already looked into the optab,
1417 so skip these checks. */
1418 if (compute_type
== type
)
1420 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1421 if (VECTOR_MODE_P (compute_mode
))
1423 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1424 return compute_type
;
1425 if (code
== MULT_HIGHPART_EXPR
1426 && can_mult_highpart_p (compute_mode
,
1427 TYPE_UNSIGNED (compute_type
)))
1428 return compute_type
;
1430 /* There is no operation in hardware, so fall back to scalars. */
1431 compute_type
= TREE_TYPE (type
);
1434 return compute_type
;
1437 /* Helper function of expand_vector_operations_1. Return number of
1438 vector elements for vector types or 1 for other types. */
1441 count_type_subparts (tree type
)
1443 return VECTOR_TYPE_P (type
) ? TYPE_VECTOR_SUBPARTS (type
) : 1;
1447 do_cond (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
1448 tree bitpos
, tree bitsize
, enum tree_code code
,
1449 tree type ATTRIBUTE_UNUSED
)
1451 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
1452 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
1453 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
1454 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
1455 tree cond
= gimple_assign_rhs1 (gsi_stmt (*gsi
));
1456 return gimplify_build3 (gsi
, code
, inner_type
, unshare_expr (cond
), a
, b
);
1459 /* Expand a vector COND_EXPR to scalars, piecewise. */
1461 expand_vector_scalar_condition (gimple_stmt_iterator
*gsi
)
1463 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1464 tree type
= gimple_expr_type (stmt
);
1465 tree compute_type
= get_compute_type (COND_EXPR
, mov_optab
, type
);
1466 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1467 gcc_assert (compute_mode
!= BLKmode
);
1468 tree lhs
= gimple_assign_lhs (stmt
);
1469 tree rhs2
= gimple_assign_rhs2 (stmt
);
1470 tree rhs3
= gimple_assign_rhs3 (stmt
);
1473 /* If the compute mode is not a vector mode (hence we are not decomposing
1474 a BLKmode vector to smaller, hardware-supported vectors), we may want
1475 to expand the operations in parallel. */
1476 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
1477 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
1478 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
1479 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
1480 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
1481 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
1482 new_rhs
= expand_vector_parallel (gsi
, do_cond
, type
, rhs2
, rhs3
,
1485 new_rhs
= expand_vector_piecewise (gsi
, do_cond
, type
, compute_type
,
1486 rhs2
, rhs3
, COND_EXPR
);
1487 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1488 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1491 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1492 way to do it is change expand_vector_operation and its callees to
1493 return a tree_code, RHS1 and RHS2 instead of a tree. */
1494 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1495 update_stmt (gsi_stmt (*gsi
));
1498 /* Process one statement. If we identify a vector operation, expand it. */
1501 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
1503 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
= NULL_TREE
;
1504 enum tree_code code
;
1505 optab op
= unknown_optab
;
1506 enum gimple_rhs_class rhs_class
;
1509 /* Only consider code == GIMPLE_ASSIGN. */
1510 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (*gsi
));
1514 code
= gimple_assign_rhs_code (stmt
);
1515 rhs_class
= get_gimple_rhs_class (code
);
1516 lhs
= gimple_assign_lhs (stmt
);
1518 if (code
== VEC_PERM_EXPR
)
1520 lower_vec_perm (gsi
);
1524 if (code
== VEC_COND_EXPR
)
1526 expand_vector_condition (gsi
);
1530 if (code
== COND_EXPR
1531 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == VECTOR_TYPE
1532 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == BLKmode
)
1534 expand_vector_scalar_condition (gsi
);
1538 if (code
== CONSTRUCTOR
1539 && TREE_CODE (lhs
) == SSA_NAME
1540 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs
)))
1541 && !gimple_clobber_p (stmt
)
1544 optimize_vector_constructor (gsi
);
1548 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
1551 rhs1
= gimple_assign_rhs1 (stmt
);
1552 type
= gimple_expr_type (stmt
);
1553 if (rhs_class
== GIMPLE_BINARY_RHS
)
1554 rhs2
= gimple_assign_rhs2 (stmt
);
1556 if (TREE_CODE (type
) != VECTOR_TYPE
)
1559 /* If the vector operation is operating on all same vector elements
1560 implement it with a scalar operation and a splat if the target
1561 supports the scalar operation. */
1562 tree srhs1
, srhs2
= NULL_TREE
;
1563 if ((srhs1
= ssa_uniform_vector_p (rhs1
)) != NULL_TREE
1564 && (rhs2
== NULL_TREE
1565 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2
))
1567 || (srhs2
= ssa_uniform_vector_p (rhs2
)) != NULL_TREE
)
1568 /* As we query direct optabs restrict to non-convert operations. */
1569 && TYPE_MODE (TREE_TYPE (type
)) == TYPE_MODE (TREE_TYPE (srhs1
)))
1571 op
= optab_for_tree_code (code
, TREE_TYPE (type
), optab_scalar
);
1572 if (op
>= FIRST_NORM_OPTAB
&& op
<= LAST_NORM_OPTAB
1573 && optab_handler (op
, TYPE_MODE (TREE_TYPE (type
))) != CODE_FOR_nothing
)
1575 tree slhs
= make_ssa_name (TREE_TYPE (srhs1
));
1576 gimple
*repl
= gimple_build_assign (slhs
, code
, srhs1
, srhs2
);
1577 gsi_insert_before (gsi
, repl
, GSI_SAME_STMT
);
1578 gimple_assign_set_rhs_from_tree (gsi
,
1579 build_vector_from_val (type
, slhs
));
1585 /* A scalar operation pretending to be a vector one. */
1586 if (VECTOR_BOOLEAN_TYPE_P (type
)
1587 && !VECTOR_MODE_P (TYPE_MODE (type
))
1588 && TYPE_MODE (type
) != BLKmode
)
1591 if (CONVERT_EXPR_CODE_P (code
)
1592 || code
== FLOAT_EXPR
1593 || code
== FIX_TRUNC_EXPR
1594 || code
== VIEW_CONVERT_EXPR
)
1597 /* The signedness is determined from input argument. */
1598 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
1599 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
1600 type
= TREE_TYPE (rhs1
);
1602 /* For widening/narrowing vector operations, the relevant type is of the
1603 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1604 calculated in the same way above. */
1605 if (code
== WIDEN_SUM_EXPR
1606 || code
== VEC_WIDEN_MULT_HI_EXPR
1607 || code
== VEC_WIDEN_MULT_LO_EXPR
1608 || code
== VEC_WIDEN_MULT_EVEN_EXPR
1609 || code
== VEC_WIDEN_MULT_ODD_EXPR
1610 || code
== VEC_UNPACK_HI_EXPR
1611 || code
== VEC_UNPACK_LO_EXPR
1612 || code
== VEC_PACK_TRUNC_EXPR
1613 || code
== VEC_PACK_SAT_EXPR
1614 || code
== VEC_PACK_FIX_TRUNC_EXPR
1615 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
1616 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
1617 type
= TREE_TYPE (rhs1
);
1619 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1621 if (code
== LSHIFT_EXPR
1622 || code
== RSHIFT_EXPR
1623 || code
== LROTATE_EXPR
1624 || code
== RROTATE_EXPR
)
1628 /* Check whether we have vector <op> {x,x,x,x} where x
1629 could be a scalar variable or a constant. Transform
1630 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1631 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1635 if ((first
= ssa_uniform_vector_p (rhs2
)) != NULL_TREE
)
1637 gimple_assign_set_rhs2 (stmt
, first
);
1643 opv
= optab_for_tree_code (code
, type
, optab_vector
);
1644 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1648 op
= optab_for_tree_code (code
, type
, optab_scalar
);
1650 compute_type
= get_compute_type (code
, op
, type
);
1651 if (compute_type
== type
)
1653 /* The rtl expander will expand vector/scalar as vector/vector
1654 if necessary. Pick one with wider vector type. */
1655 tree compute_vtype
= get_compute_type (code
, opv
, type
);
1656 if (count_type_subparts (compute_vtype
)
1657 > count_type_subparts (compute_type
))
1659 compute_type
= compute_vtype
;
1664 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1666 if (compute_type
== NULL_TREE
)
1667 compute_type
= get_compute_type (code
, op
, type
);
1668 if (compute_type
== type
)
1670 /* Before splitting vector rotates into scalar rotates,
1671 see if we can't use vector shifts and BIT_IOR_EXPR
1672 instead. For vector by vector rotates we'd also
1673 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1674 for now, fold doesn't seem to create such rotates anyway. */
1675 if (compute_type
== TREE_TYPE (type
)
1676 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1678 optab oplv
= vashl_optab
, opl
= ashl_optab
;
1679 optab oprv
= vlshr_optab
, opr
= lshr_optab
, opo
= ior_optab
;
1680 tree compute_lvtype
= get_compute_type (LSHIFT_EXPR
, oplv
, type
);
1681 tree compute_rvtype
= get_compute_type (RSHIFT_EXPR
, oprv
, type
);
1682 tree compute_otype
= get_compute_type (BIT_IOR_EXPR
, opo
, type
);
1683 tree compute_ltype
= get_compute_type (LSHIFT_EXPR
, opl
, type
);
1684 tree compute_rtype
= get_compute_type (RSHIFT_EXPR
, opr
, type
);
1685 /* The rtl expander will expand vector/scalar as vector/vector
1686 if necessary. Pick one with wider vector type. */
1687 if (count_type_subparts (compute_lvtype
)
1688 > count_type_subparts (compute_ltype
))
1690 compute_ltype
= compute_lvtype
;
1693 if (count_type_subparts (compute_rvtype
)
1694 > count_type_subparts (compute_rtype
))
1696 compute_rtype
= compute_rvtype
;
1699 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1701 compute_type
= compute_ltype
;
1702 if (count_type_subparts (compute_type
)
1703 > count_type_subparts (compute_rtype
))
1704 compute_type
= compute_rtype
;
1705 if (count_type_subparts (compute_type
)
1706 > count_type_subparts (compute_otype
))
1707 compute_type
= compute_otype
;
1708 /* Verify all 3 operations can be performed in that type. */
1709 if (compute_type
!= TREE_TYPE (type
))
1711 if (optab_handler (opl
, TYPE_MODE (compute_type
))
1713 || optab_handler (opr
, TYPE_MODE (compute_type
))
1715 || optab_handler (opo
, TYPE_MODE (compute_type
))
1716 == CODE_FOR_nothing
)
1717 compute_type
= TREE_TYPE (type
);
1723 op
= optab_for_tree_code (code
, type
, optab_default
);
1725 /* Optabs will try converting a negation into a subtraction, so
1726 look for it as well. TODO: negation of floating-point vectors
1727 might be turned into an exclusive OR toggling the sign bit. */
1728 if (op
== unknown_optab
1729 && code
== NEGATE_EXPR
1730 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
1731 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
1733 if (compute_type
== NULL_TREE
)
1734 compute_type
= get_compute_type (code
, op
, type
);
1735 if (compute_type
== type
)
1738 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
1740 /* Leave expression untouched for later expansion. */
1741 if (new_rhs
== NULL_TREE
)
1744 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1745 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1748 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1749 way to do it is change expand_vector_operation and its callees to
1750 return a tree_code, RHS1 and RHS2 instead of a tree. */
1751 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1752 update_stmt (gsi_stmt (*gsi
));
1755 /* Use this to lower vector operations introduced by the vectorizer,
1756 if it may need the bit-twiddling tricks implemented in this file. */
1759 expand_vector_operations (void)
1761 gimple_stmt_iterator gsi
;
1763 bool cfg_changed
= false;
1765 FOR_EACH_BB_FN (bb
, cfun
)
1767 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1769 expand_vector_operations_1 (&gsi
);
1770 /* ??? If we do not cleanup EH then we will ICE in
1771 verification. But in reality we have created wrong-code
1772 as we did not properly transition EH info and edges to
1773 the piecewise computations. */
1774 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
1775 && gimple_purge_dead_eh_edges (bb
))
1780 return cfg_changed
? TODO_cleanup_cfg
: 0;
1785 const pass_data pass_data_lower_vector
=
1787 GIMPLE_PASS
, /* type */
1788 "veclower", /* name */
1789 OPTGROUP_VEC
, /* optinfo_flags */
1790 TV_NONE
, /* tv_id */
1791 PROP_cfg
, /* properties_required */
1792 PROP_gimple_lvec
, /* properties_provided */
1793 0, /* properties_destroyed */
1794 0, /* todo_flags_start */
1795 TODO_update_ssa
, /* todo_flags_finish */
1798 class pass_lower_vector
: public gimple_opt_pass
1801 pass_lower_vector (gcc::context
*ctxt
)
1802 : gimple_opt_pass (pass_data_lower_vector
, ctxt
)
1805 /* opt_pass methods: */
1806 virtual bool gate (function
*fun
)
1808 return !(fun
->curr_properties
& PROP_gimple_lvec
);
1811 virtual unsigned int execute (function
*)
1813 return expand_vector_operations ();
1816 }; // class pass_lower_vector
1821 make_pass_lower_vector (gcc::context
*ctxt
)
1823 return new pass_lower_vector (ctxt
);
1828 const pass_data pass_data_lower_vector_ssa
=
1830 GIMPLE_PASS
, /* type */
1831 "veclower2", /* name */
1832 OPTGROUP_VEC
, /* optinfo_flags */
1833 TV_NONE
, /* tv_id */
1834 PROP_cfg
, /* properties_required */
1835 PROP_gimple_lvec
, /* properties_provided */
1836 0, /* properties_destroyed */
1837 0, /* todo_flags_start */
1839 | TODO_cleanup_cfg
), /* todo_flags_finish */
1842 class pass_lower_vector_ssa
: public gimple_opt_pass
1845 pass_lower_vector_ssa (gcc::context
*ctxt
)
1846 : gimple_opt_pass (pass_data_lower_vector_ssa
, ctxt
)
1849 /* opt_pass methods: */
1850 opt_pass
* clone () { return new pass_lower_vector_ssa (m_ctxt
); }
1851 virtual unsigned int execute (function
*)
1853 return expand_vector_operations ();
1856 }; // class pass_lower_vector_ssa
1861 make_pass_lower_vector_ssa (gcc::context
*ctxt
)
1863 return new pass_lower_vector_ssa (ctxt
);
1866 #include "gt-tree-vect-generic.h"