1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
24 #include "stor-layout.h"
26 #include "langhooks.h"
27 #include "basic-block.h"
28 #include "tree-ssa-alias.h"
29 #include "internal-fn.h"
31 #include "gimple-expr.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "gimple-ssa.h"
38 #include "stringpool.h"
39 #include "tree-ssanames.h"
40 #include "tree-iterator.h"
41 #include "tree-pass.h"
43 #include "diagnostic.h"
46 /* Need to include rtl.h, expr.h, etc. for optabs. */
51 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
54 /* Build a constant of type TYPE, made of VALUE's bits replicated
55 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
57 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
59 int width
= tree_to_uhwi (TYPE_SIZE (inner_type
));
60 int n
= (TYPE_PRECISION (type
) + HOST_BITS_PER_WIDE_INT
- 1)
61 / HOST_BITS_PER_WIDE_INT
;
62 unsigned HOST_WIDE_INT low
, mask
;
63 HOST_WIDE_INT a
[WIDE_INT_MAX_ELTS
];
66 gcc_assert (n
&& n
<= WIDE_INT_MAX_ELTS
);
68 if (width
== HOST_BITS_PER_WIDE_INT
)
72 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
73 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
76 for (i
= 0; i
< n
; i
++)
79 gcc_assert (TYPE_PRECISION (type
) <= MAX_BITSIZE_MODE_ANY_INT
);
80 return wide_int_to_tree
81 (type
, wide_int::from_array (a
, n
, TYPE_PRECISION (type
)));
84 static GTY(()) tree vector_inner_type
;
85 static GTY(()) tree vector_last_type
;
86 static GTY(()) int vector_last_nunits
;
88 /* Return a suitable vector types made of SUBPARTS units each of mode
89 "word_mode" (the global variable). */
91 build_word_mode_vector_type (int nunits
)
93 if (!vector_inner_type
)
94 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
95 else if (vector_last_nunits
== nunits
)
97 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
98 return vector_last_type
;
101 /* We build a new type, but we canonicalize it nevertheless,
102 because it still saves some memory. */
103 vector_last_nunits
= nunits
;
104 vector_last_type
= type_hash_canon (nunits
,
105 build_vector_type (vector_inner_type
,
107 return vector_last_type
;
110 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
111 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
114 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
115 tree t
, tree bitsize
, tree bitpos
)
118 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
120 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
124 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
125 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
128 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
129 return gimplify_build1 (gsi
, code
, inner_type
, a
);
133 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
134 tree bitpos
, tree bitsize
, enum tree_code code
)
136 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
137 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
138 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
139 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
140 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
143 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
145 INNER_TYPE is the type of A and B elements
147 returned expression is of signed integer type with the
148 size equal to the size of INNER_TYPE. */
150 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
151 tree bitpos
, tree bitsize
, enum tree_code code
)
155 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
156 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
158 comp_type
= build_nonstandard_integer_type
159 (GET_MODE_BITSIZE (TYPE_MODE (inner_type
)), 0);
161 return gimplify_build3 (gsi
, COND_EXPR
, comp_type
,
162 fold_build2 (code
, boolean_type_node
, a
, b
),
163 build_int_cst (comp_type
, -1),
164 build_int_cst (comp_type
, 0));
167 /* Expand vector addition to scalars. This does bit twiddling
168 in order to increase parallelism:
170 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
173 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
174 (a ^ ~b) & 0x80808080
176 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
178 This optimization should be done only if 4 vector items or more
181 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
182 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
185 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
186 unsigned HOST_WIDE_INT max
;
187 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
189 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
190 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
191 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
193 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
194 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
196 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
197 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
198 if (code
== PLUS_EXPR
)
199 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
202 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
203 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
206 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
207 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
208 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
212 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
213 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
214 tree bitsize ATTRIBUTE_UNUSED
,
215 enum tree_code code ATTRIBUTE_UNUSED
)
217 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
219 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
221 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
222 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
223 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
225 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
227 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
228 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
229 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
230 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
231 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
234 /* Expand a vector operation to scalars, by using many operations
235 whose type is the vector type's inner type. */
237 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
238 tree type
, tree inner_type
,
239 tree a
, tree b
, enum tree_code code
)
241 vec
<constructor_elt
, va_gc
> *v
;
242 tree part_width
= TYPE_SIZE (inner_type
);
243 tree index
= bitsize_int (0);
244 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
245 int delta
= tree_to_uhwi (part_width
)
246 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type
)));
248 location_t loc
= gimple_location (gsi_stmt (*gsi
));
250 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
251 warning_at (loc
, OPT_Wvector_operation_performance
,
252 "vector operation will be expanded piecewise");
254 warning_at (loc
, OPT_Wvector_operation_performance
,
255 "vector operation will be expanded in parallel");
257 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
258 for (i
= 0; i
< nunits
;
259 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
261 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
);
262 constructor_elt ce
= {NULL_TREE
, result
};
266 return build_constructor (type
, v
);
269 /* Expand a vector operation to scalars with the freedom to use
270 a scalar integer type, or to use a different size for the items
271 in the vector type. */
273 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
277 tree result
, compute_type
;
278 enum machine_mode mode
;
279 int n_words
= tree_to_uhwi (TYPE_SIZE_UNIT (type
)) / UNITS_PER_WORD
;
280 location_t loc
= gimple_location (gsi_stmt (*gsi
));
282 /* We have three strategies. If the type is already correct, just do
283 the operation an element at a time. Else, if the vector is wider than
284 one word, do it a word at a time; finally, if the vector is smaller
285 than one word, do it as a scalar. */
286 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
287 return expand_vector_piecewise (gsi
, f
,
288 type
, TREE_TYPE (type
),
290 else if (n_words
> 1)
292 tree word_type
= build_word_mode_vector_type (n_words
);
293 result
= expand_vector_piecewise (gsi
, f
,
294 word_type
, TREE_TYPE (word_type
),
296 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
301 /* Use a single scalar operation with a mode no wider than word_mode. */
302 mode
= mode_for_size (tree_to_uhwi (TYPE_SIZE (type
)), MODE_INT
, 0);
303 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
304 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
305 warning_at (loc
, OPT_Wvector_operation_performance
,
306 "vector operation will be expanded with a "
307 "single scalar operation");
313 /* Expand a vector operation to scalars; for integer types we can use
314 special bit twiddling tricks to do the sums a word at a time, using
315 function F_PARALLEL instead of F. These tricks are done only if
316 they can process at least four items, that is, only if the vector
317 holds at least four items and if a word can hold four items. */
319 expand_vector_addition (gimple_stmt_iterator
*gsi
,
320 elem_op_func f
, elem_op_func f_parallel
,
321 tree type
, tree a
, tree b
, enum tree_code code
)
323 int parts_per_word
= UNITS_PER_WORD
324 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
326 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
327 && parts_per_word
>= 4
328 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
329 return expand_vector_parallel (gsi
, f_parallel
,
332 return expand_vector_piecewise (gsi
, f
,
333 type
, TREE_TYPE (type
),
337 /* Try to expand vector comparison expression OP0 CODE OP1 by
338 querying optab if the following expression:
339 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
342 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
343 tree op1
, enum tree_code code
)
346 if (! expand_vec_cond_expr_p (type
, TREE_TYPE (op0
)))
347 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
348 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
355 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
356 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
357 the result if successful, otherwise return NULL_TREE. */
359 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
362 unsigned int i
, nunits
= TYPE_VECTOR_SUBPARTS (type
);
363 bool scalar_shift
= true;
365 for (i
= 1; i
< nunits
; i
++)
367 if (shiftcnts
[i
] != shiftcnts
[0])
368 scalar_shift
= false;
371 if (scalar_shift
&& shiftcnts
[0] == 0)
376 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
377 if (op
!= unknown_optab
378 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
379 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
380 build_int_cst (NULL_TREE
, shiftcnts
[0]));
383 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
384 if (op
!= unknown_optab
385 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
387 tree
*vec
= XALLOCAVEC (tree
, nunits
);
388 for (i
= 0; i
< nunits
; i
++)
389 vec
[i
] = build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]);
390 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
391 build_vector (type
, vec
));
397 /* Try to expand integer vector division by constant using
398 widening multiply, shifts and additions. */
400 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
401 tree op1
, enum tree_code code
)
403 bool use_pow2
= true;
404 bool has_vector_shift
= true;
405 int mode
= -1, this_mode
;
406 int pre_shift
= -1, post_shift
;
407 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (type
);
408 int *shifts
= XALLOCAVEC (int, nunits
* 4);
409 int *pre_shifts
= shifts
+ nunits
;
410 int *post_shifts
= pre_shifts
+ nunits
;
411 int *shift_temps
= post_shifts
+ nunits
;
412 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
413 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
416 signop sign_p
= TYPE_SIGN (TREE_TYPE (type
));
417 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
419 tree cur_op
, mulcst
, tem
;
422 if (prec
> HOST_BITS_PER_WIDE_INT
)
425 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
426 if (op
== unknown_optab
427 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
428 has_vector_shift
= false;
430 /* Analysis phase. Determine if all op1 elements are either power
431 of two and it is possible to expand it using shifts (or for remainder
432 using masking). Additionally compute the multiplicative constants
433 and pre and post shifts if the division is to be expanded using
434 widening or high part multiplication plus shifts. */
435 for (i
= 0; i
< nunits
; i
++)
437 tree cst
= VECTOR_CST_ELT (op1
, i
);
438 unsigned HOST_WIDE_INT ml
;
440 if (TREE_CODE (cst
) != INTEGER_CST
|| integer_zerop (cst
))
446 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
450 shifts
[i
] = tree_log2 (cst
);
451 if (shifts
[i
] != shifts
[0]
452 && code
== TRUNC_DIV_EXPR
453 && !has_vector_shift
)
458 if (sign_p
== UNSIGNED
)
460 unsigned HOST_WIDE_INT mh
;
461 unsigned HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
) & mask
;
463 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
464 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
473 /* Find a suitable multiplier and right shift count
474 instead of multiplying with D. */
475 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
477 /* If the suggested multiplier is more than SIZE bits, we can
478 do better for even divisors, using an initial right shift. */
479 if ((mh
!= 0 && (d
& 1) == 0)
480 || (!has_vector_shift
&& pre_shift
!= -1))
482 if (has_vector_shift
)
483 pre_shift
= floor_log2 (d
& -d
);
484 else if (pre_shift
== -1)
487 for (j
= 0; j
< nunits
; j
++)
489 tree cst2
= VECTOR_CST_ELT (op1
, j
);
490 unsigned HOST_WIDE_INT d2
;
493 if (!tree_fits_uhwi_p (cst2
))
495 d2
= tree_to_uhwi (cst2
) & mask
;
498 this_pre_shift
= floor_log2 (d2
& -d2
);
499 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
500 pre_shift
= this_pre_shift
;
502 if (i
!= 0 && pre_shift
!= 0)
512 if ((d
>> pre_shift
) <= 1)
517 mh
= choose_multiplier (d
>> pre_shift
, prec
,
519 &ml
, &post_shift
, &dummy_int
);
521 pre_shifts
[i
] = pre_shift
;
531 HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
);
532 unsigned HOST_WIDE_INT abs_d
;
537 /* Since d might be INT_MIN, we have to cast to
538 unsigned HOST_WIDE_INT before negating to avoid
539 undefined signed overflow. */
541 ? (unsigned HOST_WIDE_INT
) d
542 : - (unsigned HOST_WIDE_INT
) d
);
544 /* n rem d = n rem -d */
545 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
547 else if (abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
549 /* This case is not handled correctly below. */
559 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
560 &post_shift
, &dummy_int
);
561 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
563 this_mode
= 4 + (d
< 0);
564 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
567 this_mode
= 2 + (d
< 0);
570 post_shifts
[i
] = post_shift
;
571 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
572 || post_shift
>= prec
573 || pre_shifts
[i
] >= prec
)
578 else if (mode
!= this_mode
)
582 vec
= XALLOCAVEC (tree
, nunits
);
586 tree addend
= NULL_TREE
;
587 if (sign_p
== SIGNED
)
591 /* Both division and remainder sequences need
592 op0 < 0 ? mask : 0 computed. It can be either computed as
593 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
594 if none of the shifts is 0, or as the conditional. */
595 for (i
= 0; i
< nunits
; i
++)
599 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
601 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
603 for (i
= 0; i
< nunits
; i
++)
604 shift_temps
[i
] = prec
- 1;
605 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
606 if (cur_op
!= NULL_TREE
)
608 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
610 for (i
= 0; i
< nunits
; i
++)
611 shift_temps
[i
] = prec
- shifts
[i
];
612 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
613 if (cur_op
!= NULL_TREE
)
614 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
618 if (addend
== NULL_TREE
619 && expand_vec_cond_expr_p (type
, type
))
621 tree zero
, cst
, cond
;
624 zero
= build_zero_cst (type
);
625 cond
= build2 (LT_EXPR
, type
, op0
, zero
);
626 for (i
= 0; i
< nunits
; i
++)
627 vec
[i
] = build_int_cst (TREE_TYPE (type
),
628 ((unsigned HOST_WIDE_INT
) 1
630 cst
= build_vector (type
, vec
);
631 addend
= make_ssa_name (type
, NULL
);
632 stmt
= gimple_build_assign_with_ops (VEC_COND_EXPR
, addend
,
634 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
637 if (code
== TRUNC_DIV_EXPR
)
639 if (sign_p
== UNSIGNED
)
641 /* q = op0 >> shift; */
642 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
643 if (cur_op
!= NULL_TREE
)
646 else if (addend
!= NULL_TREE
)
648 /* t1 = op0 + addend;
650 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
651 if (op
!= unknown_optab
652 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
654 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
655 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
656 if (cur_op
!= NULL_TREE
)
664 for (i
= 0; i
< nunits
; i
++)
665 vec
[i
] = build_int_cst (TREE_TYPE (type
),
666 ((unsigned HOST_WIDE_INT
) 1
668 mask
= build_vector (type
, vec
);
669 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
670 if (op
!= unknown_optab
671 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
673 if (sign_p
== UNSIGNED
)
674 /* r = op0 & mask; */
675 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
676 else if (addend
!= NULL_TREE
)
678 /* t1 = op0 + addend;
681 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
682 if (op
!= unknown_optab
683 && optab_handler (op
, TYPE_MODE (type
))
686 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
688 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
690 op
= optab_for_tree_code (MINUS_EXPR
, type
,
692 if (op
!= unknown_optab
693 && optab_handler (op
, TYPE_MODE (type
))
695 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
703 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
706 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
714 gcc_assert (sign_p
== UNSIGNED
);
715 /* t1 = oprnd0 >> pre_shift;
717 q = t2 >> post_shift; */
718 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
719 if (cur_op
== NULL_TREE
)
723 gcc_assert (sign_p
== UNSIGNED
);
724 for (i
= 0; i
< nunits
; i
++)
734 gcc_assert (sign_p
== SIGNED
);
735 for (i
= 0; i
< nunits
; i
++)
736 shift_temps
[i
] = prec
- 1;
742 for (i
= 0; i
< nunits
; i
++)
743 vec
[i
] = build_int_cst (TREE_TYPE (type
), mulc
[i
]);
744 mulcst
= build_vector (type
, vec
);
746 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
751 /* t1 = oprnd0 >> pre_shift;
753 q = t2 >> post_shift; */
754 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
757 /* t1 = oprnd0 h* ml;
761 q = t4 >> (post_shift - 1); */
762 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
763 if (op
== unknown_optab
764 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
766 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
767 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
768 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
769 if (op
== unknown_optab
770 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
772 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
773 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
774 if (cur_op
== NULL_TREE
)
781 /* t1 = oprnd0 h* ml;
782 t2 = t1; [ iff (mode & 2) != 0 ]
783 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
784 t3 = t2 >> post_shift;
785 t4 = oprnd0 >> (prec - 1);
786 q = t3 - t4; [ iff (mode & 1) == 0 ]
787 q = t4 - t3; [ iff (mode & 1) != 0 ] */
790 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
791 if (op
== unknown_optab
792 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
794 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
796 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
797 if (cur_op
== NULL_TREE
)
799 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
800 if (tem
== NULL_TREE
)
802 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
803 if (op
== unknown_optab
804 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
807 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
809 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
815 if (code
== TRUNC_DIV_EXPR
)
818 /* We divided. Now finish by:
821 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
822 if (op
== unknown_optab
823 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
825 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
826 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
827 if (op
== unknown_optab
828 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
830 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
833 /* Expand a vector condition to scalars, by using many conditions
834 on the vector's elements. */
836 expand_vector_condition (gimple_stmt_iterator
*gsi
)
838 gimple stmt
= gsi_stmt (*gsi
);
839 tree type
= gimple_expr_type (stmt
);
840 tree a
= gimple_assign_rhs1 (stmt
);
843 bool a_is_comparison
= false;
844 tree b
= gimple_assign_rhs2 (stmt
);
845 tree c
= gimple_assign_rhs3 (stmt
);
846 vec
<constructor_elt
, va_gc
> *v
;
848 tree inner_type
= TREE_TYPE (type
);
849 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
850 tree comp_inner_type
= cond_type
;
851 tree width
= TYPE_SIZE (inner_type
);
852 tree index
= bitsize_int (0);
853 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
855 location_t loc
= gimple_location (gsi_stmt (*gsi
));
857 if (!is_gimple_val (a
))
859 gcc_assert (COMPARISON_CLASS_P (a
));
860 a_is_comparison
= true;
861 a1
= TREE_OPERAND (a
, 0);
862 a2
= TREE_OPERAND (a
, 1);
863 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
866 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
)))
869 /* TODO: try and find a smaller vector type. */
871 warning_at (loc
, OPT_Wvector_operation_performance
,
872 "vector condition will be expanded piecewise");
874 vec_alloc (v
, nunits
);
875 for (i
= 0; i
< nunits
;
876 i
++, index
= int_const_binop (PLUS_EXPR
, index
, width
))
879 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
880 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
883 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
, width
, index
);
884 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
, width
, index
);
885 aa
= build2 (TREE_CODE (a
), cond_type
, aa1
, aa2
);
888 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
889 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
890 constructor_elt ce
= {NULL_TREE
, result
};
894 constr
= build_constructor (type
, v
);
895 gimple_assign_set_rhs_from_tree (gsi
, constr
);
896 update_stmt (gsi_stmt (*gsi
));
900 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
901 gimple assign
, enum tree_code code
)
903 enum machine_mode compute_mode
= TYPE_MODE (compute_type
);
905 /* If the compute mode is not a vector mode (hence we are not decomposing
906 a BLKmode vector to smaller, hardware-supported vectors), we may want
907 to expand the operations in parallel. */
908 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
909 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
910 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
911 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
912 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
913 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
918 if (!TYPE_OVERFLOW_TRAPS (type
))
919 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
920 gimple_assign_rhs1 (assign
),
921 gimple_assign_rhs2 (assign
), code
);
925 if (!TYPE_OVERFLOW_TRAPS (type
))
926 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
927 gimple_assign_rhs1 (assign
),
934 return expand_vector_parallel (gsi
, do_binop
, type
,
935 gimple_assign_rhs1 (assign
),
936 gimple_assign_rhs2 (assign
), code
);
939 return expand_vector_parallel (gsi
, do_unop
, type
,
940 gimple_assign_rhs1 (assign
),
957 tree rhs1
= gimple_assign_rhs1 (assign
);
958 tree rhs2
= gimple_assign_rhs2 (assign
);
960 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
966 tree rhs1
= gimple_assign_rhs1 (assign
);
967 tree rhs2
= gimple_assign_rhs2 (assign
);
971 || !VECTOR_INTEGER_TYPE_P (type
)
972 || TREE_CODE (rhs2
) != VECTOR_CST
973 || !VECTOR_MODE_P (TYPE_MODE (type
)))
976 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
977 if (ret
!= NULL_TREE
)
986 if (TREE_CODE_CLASS (code
) == tcc_unary
)
987 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
988 gimple_assign_rhs1 (assign
),
991 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
992 gimple_assign_rhs1 (assign
),
993 gimple_assign_rhs2 (assign
), code
);
997 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
999 _9 = { b_7, b_7, b_7, b_7 };
1000 a_5 = _9 + { 0, 3, 6, 9 };
1001 because vector splat operation is usually more efficient
1002 than piecewise initialization of the vector. */
1005 optimize_vector_constructor (gimple_stmt_iterator
*gsi
)
1007 gimple stmt
= gsi_stmt (*gsi
);
1008 tree lhs
= gimple_assign_lhs (stmt
);
1009 tree rhs
= gimple_assign_rhs1 (stmt
);
1010 tree type
= TREE_TYPE (rhs
);
1011 unsigned int i
, j
, nelts
= TYPE_VECTOR_SUBPARTS (type
);
1012 bool all_same
= true;
1013 constructor_elt
*elt
;
1016 tree base
= NULL_TREE
;
1019 if (nelts
<= 2 || CONSTRUCTOR_NELTS (rhs
) != nelts
)
1021 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
1022 if (op
== unknown_optab
1023 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
1025 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
1026 if (TREE_CODE (elt
->value
) != SSA_NAME
1027 || TREE_CODE (TREE_TYPE (elt
->value
)) == VECTOR_TYPE
)
1031 tree this_base
= elt
->value
;
1032 if (this_base
!= CONSTRUCTOR_ELT (rhs
, 0)->value
)
1034 for (j
= 0; j
< nelts
+ 1; j
++)
1036 g
= SSA_NAME_DEF_STMT (this_base
);
1037 if (is_gimple_assign (g
)
1038 && gimple_assign_rhs_code (g
) == PLUS_EXPR
1039 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
1040 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
1041 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
1042 this_base
= gimple_assign_rhs1 (g
);
1048 else if (this_base
!= base
)
1053 cst
= XALLOCAVEC (tree
, nelts
);
1054 for (i
= 0; i
< nelts
; i
++)
1056 tree this_base
= CONSTRUCTOR_ELT (rhs
, i
)->value
;;
1057 cst
[i
] = build_zero_cst (TREE_TYPE (base
));
1058 while (this_base
!= base
)
1060 g
= SSA_NAME_DEF_STMT (this_base
);
1061 cst
[i
] = fold_binary (PLUS_EXPR
, TREE_TYPE (base
),
1062 cst
[i
], gimple_assign_rhs2 (g
));
1063 if (cst
[i
] == NULL_TREE
1064 || TREE_CODE (cst
[i
]) != INTEGER_CST
1065 || TREE_OVERFLOW (cst
[i
]))
1067 this_base
= gimple_assign_rhs1 (g
);
1070 for (i
= 0; i
< nelts
; i
++)
1071 CONSTRUCTOR_ELT (rhs
, i
)->value
= base
;
1072 g
= gimple_build_assign (make_ssa_name (type
, NULL
), rhs
);
1073 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1074 g
= gimple_build_assign_with_ops (PLUS_EXPR
, lhs
, gimple_assign_lhs (g
),
1075 build_vector (type
, cst
));
1076 gsi_replace (gsi
, g
, false);
1079 /* Return a type for the widest vector mode whose components are of type
1080 TYPE, or NULL_TREE if none is found. */
1083 type_for_widest_vector_mode (tree type
, optab op
)
1085 enum machine_mode inner_mode
= TYPE_MODE (type
);
1086 enum machine_mode best_mode
= VOIDmode
, mode
;
1087 int best_nunits
= 0;
1089 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1090 mode
= MIN_MODE_VECTOR_FLOAT
;
1091 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1092 mode
= MIN_MODE_VECTOR_FRACT
;
1093 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1094 mode
= MIN_MODE_VECTOR_UFRACT
;
1095 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1096 mode
= MIN_MODE_VECTOR_ACCUM
;
1097 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1098 mode
= MIN_MODE_VECTOR_UACCUM
;
1100 mode
= MIN_MODE_VECTOR_INT
;
1102 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
1103 if (GET_MODE_INNER (mode
) == inner_mode
1104 && GET_MODE_NUNITS (mode
) > best_nunits
1105 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1106 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1108 if (best_mode
== VOIDmode
)
1111 return build_vector_type_for_mode (type
, best_mode
);
1115 /* Build a reference to the element of the vector VECT. Function
1116 returns either the element itself, either BIT_FIELD_REF, or an
1117 ARRAY_REF expression.
1119 GSI is required to insert temporary variables while building a
1120 refernece to the element of the vector VECT.
1122 PTMPVEC is a pointer to the temporary variable for caching
1123 purposes. In case when PTMPVEC is NULL new temporary variable
1126 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1128 tree vect_type
, vect_elt_type
;
1132 bool need_asgn
= true;
1133 unsigned int elements
;
1135 vect_type
= TREE_TYPE (vect
);
1136 vect_elt_type
= TREE_TYPE (vect_type
);
1137 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1139 if (TREE_CODE (idx
) == INTEGER_CST
)
1141 unsigned HOST_WIDE_INT index
;
1143 /* Given that we're about to compute a binary modulus,
1144 we don't care about the high bits of the value. */
1145 index
= TREE_INT_CST_LOW (idx
);
1146 if (!tree_fits_uhwi_p (idx
) || index
>= elements
)
1148 index
&= elements
- 1;
1149 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1152 /* When lowering a vector statement sequence do some easy
1153 simplification by looking through intermediate vector results. */
1154 if (TREE_CODE (vect
) == SSA_NAME
)
1156 gimple def_stmt
= SSA_NAME_DEF_STMT (vect
);
1157 if (is_gimple_assign (def_stmt
)
1158 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1159 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1160 vect
= gimple_assign_rhs1 (def_stmt
);
1163 if (TREE_CODE (vect
) == VECTOR_CST
)
1164 return VECTOR_CST_ELT (vect
, index
);
1165 else if (TREE_CODE (vect
) == CONSTRUCTOR
1166 && (CONSTRUCTOR_NELTS (vect
) == 0
1167 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1170 if (index
< CONSTRUCTOR_NELTS (vect
))
1171 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1172 return build_zero_cst (vect_elt_type
);
1176 tree size
= TYPE_SIZE (vect_elt_type
);
1177 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1179 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1184 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1186 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1195 TREE_ADDRESSABLE (tmpvec
) = 1;
1196 asgn
= gimple_build_assign (tmpvec
, vect
);
1197 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1200 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1201 return build4 (ARRAY_REF
, vect_elt_type
,
1202 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1203 idx
, NULL_TREE
, NULL_TREE
);
1206 /* Check if VEC_PERM_EXPR within the given setting is supported
1207 by hardware, or lower it piecewise.
1209 When VEC_PERM_EXPR has the same first and second operands:
1210 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1211 {v0[mask[0]], v0[mask[1]], ...}
1212 MASK and V0 must have the same number of elements.
1214 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1215 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1216 V0 and V1 must have the same type. MASK, V0, V1 must have the
1217 same number of arguments. */
1220 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1222 gimple stmt
= gsi_stmt (*gsi
);
1223 tree mask
= gimple_assign_rhs3 (stmt
);
1224 tree vec0
= gimple_assign_rhs1 (stmt
);
1225 tree vec1
= gimple_assign_rhs2 (stmt
);
1226 tree vect_type
= TREE_TYPE (vec0
);
1227 tree mask_type
= TREE_TYPE (mask
);
1228 tree vect_elt_type
= TREE_TYPE (vect_type
);
1229 tree mask_elt_type
= TREE_TYPE (mask_type
);
1230 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1231 vec
<constructor_elt
, va_gc
> *v
;
1232 tree constr
, t
, si
, i_val
;
1233 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1234 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1235 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1238 if (TREE_CODE (mask
) == SSA_NAME
)
1240 gimple def_stmt
= SSA_NAME_DEF_STMT (mask
);
1241 if (is_gimple_assign (def_stmt
)
1242 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1243 mask
= gimple_assign_rhs1 (def_stmt
);
1246 if (TREE_CODE (mask
) == VECTOR_CST
)
1248 unsigned char *sel_int
= XALLOCAVEC (unsigned char, elements
);
1250 for (i
= 0; i
< elements
; ++i
)
1251 sel_int
[i
] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask
, i
))
1252 & (2 * elements
- 1));
1254 if (can_vec_perm_p (TYPE_MODE (vect_type
), false, sel_int
))
1256 gimple_assign_set_rhs3 (stmt
, mask
);
1261 else if (can_vec_perm_p (TYPE_MODE (vect_type
), true, NULL
))
1264 warning_at (loc
, OPT_Wvector_operation_performance
,
1265 "vector shuffling operation will be expanded piecewise");
1267 vec_alloc (v
, elements
);
1268 for (i
= 0; i
< elements
; i
++)
1271 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1273 if (TREE_CODE (i_val
) == INTEGER_CST
)
1275 unsigned HOST_WIDE_INT index
;
1277 index
= TREE_INT_CST_LOW (i_val
);
1278 if (!tree_fits_uhwi_p (i_val
) || index
>= elements
)
1279 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1281 if (two_operand_p
&& (index
& elements
) != 0)
1282 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1284 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1286 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1287 true, GSI_SAME_STMT
);
1291 tree cond
= NULL_TREE
, v0_val
;
1295 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1296 build_int_cst (mask_elt_type
, elements
));
1297 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1298 true, GSI_SAME_STMT
);
1301 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1302 build_int_cst (mask_elt_type
, elements
- 1));
1303 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1304 true, GSI_SAME_STMT
);
1306 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1307 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1308 true, GSI_SAME_STMT
);
1314 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1315 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1316 true, GSI_SAME_STMT
);
1318 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1319 cond
, build_zero_cst (mask_elt_type
));
1320 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1321 cond
, v0_val
, v1_val
);
1322 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1323 true, GSI_SAME_STMT
);
1329 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1332 constr
= build_constructor (vect_type
, v
);
1333 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1334 update_stmt (gsi_stmt (*gsi
));
1337 /* Return type in which CODE operation with optab OP can be
1341 get_compute_type (enum tree_code code
, optab op
, tree type
)
1343 /* For very wide vectors, try using a smaller vector mode. */
1344 tree compute_type
= type
;
1346 && (!VECTOR_MODE_P (TYPE_MODE (type
))
1347 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
))
1349 tree vector_compute_type
1350 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1351 if (vector_compute_type
!= NULL_TREE
1352 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
1353 < TYPE_VECTOR_SUBPARTS (compute_type
))
1354 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1355 != CODE_FOR_nothing
))
1356 compute_type
= vector_compute_type
;
1359 /* If we are breaking a BLKmode vector into smaller pieces,
1360 type_for_widest_vector_mode has already looked into the optab,
1361 so skip these checks. */
1362 if (compute_type
== type
)
1364 enum machine_mode compute_mode
= TYPE_MODE (compute_type
);
1365 if (VECTOR_MODE_P (compute_mode
))
1367 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1368 return compute_type
;
1369 if (code
== MULT_HIGHPART_EXPR
1370 && can_mult_highpart_p (compute_mode
,
1371 TYPE_UNSIGNED (compute_type
)))
1372 return compute_type
;
1374 /* There is no operation in hardware, so fall back to scalars. */
1375 compute_type
= TREE_TYPE (type
);
1378 return compute_type
;
1381 /* Helper function of expand_vector_operations_1. Return number of
1382 vector elements for vector types or 1 for other types. */
1385 count_type_subparts (tree type
)
1387 return VECTOR_TYPE_P (type
) ? TYPE_VECTOR_SUBPARTS (type
) : 1;
1390 /* Process one statement. If we identify a vector operation, expand it. */
1393 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
1395 gimple stmt
= gsi_stmt (*gsi
);
1396 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
= NULL_TREE
;
1397 enum tree_code code
;
1398 optab op
= unknown_optab
;
1399 enum gimple_rhs_class rhs_class
;
1402 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1405 code
= gimple_assign_rhs_code (stmt
);
1406 rhs_class
= get_gimple_rhs_class (code
);
1407 lhs
= gimple_assign_lhs (stmt
);
1409 if (code
== VEC_PERM_EXPR
)
1411 lower_vec_perm (gsi
);
1415 if (code
== VEC_COND_EXPR
)
1417 expand_vector_condition (gsi
);
1421 if (code
== CONSTRUCTOR
1422 && TREE_CODE (lhs
) == SSA_NAME
1423 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs
)))
1424 && !gimple_clobber_p (stmt
)
1427 optimize_vector_constructor (gsi
);
1431 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
1434 rhs1
= gimple_assign_rhs1 (stmt
);
1435 type
= gimple_expr_type (stmt
);
1436 if (rhs_class
== GIMPLE_BINARY_RHS
)
1437 rhs2
= gimple_assign_rhs2 (stmt
);
1439 if (TREE_CODE (type
) != VECTOR_TYPE
)
1442 if (code
== NOP_EXPR
1443 || code
== FLOAT_EXPR
1444 || code
== FIX_TRUNC_EXPR
1445 || code
== VIEW_CONVERT_EXPR
)
1448 gcc_assert (code
!= CONVERT_EXPR
);
1450 /* The signedness is determined from input argument. */
1451 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
1452 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
1453 type
= TREE_TYPE (rhs1
);
1455 /* For widening/narrowing vector operations, the relevant type is of the
1456 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1457 calculated in the same way above. */
1458 if (code
== WIDEN_SUM_EXPR
1459 || code
== VEC_WIDEN_MULT_HI_EXPR
1460 || code
== VEC_WIDEN_MULT_LO_EXPR
1461 || code
== VEC_WIDEN_MULT_EVEN_EXPR
1462 || code
== VEC_WIDEN_MULT_ODD_EXPR
1463 || code
== VEC_UNPACK_HI_EXPR
1464 || code
== VEC_UNPACK_LO_EXPR
1465 || code
== VEC_PACK_TRUNC_EXPR
1466 || code
== VEC_PACK_SAT_EXPR
1467 || code
== VEC_PACK_FIX_TRUNC_EXPR
1468 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
1469 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
1470 type
= TREE_TYPE (rhs1
);
1472 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1474 if (code
== LSHIFT_EXPR
1475 || code
== RSHIFT_EXPR
1476 || code
== LROTATE_EXPR
1477 || code
== RROTATE_EXPR
)
1481 /* Check whether we have vector <op> {x,x,x,x} where x
1482 could be a scalar variable or a constant. Transform
1483 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1484 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1489 if ((TREE_CODE (rhs2
) == VECTOR_CST
1490 && (first
= uniform_vector_p (rhs2
)) != NULL_TREE
)
1491 || (TREE_CODE (rhs2
) == SSA_NAME
1492 && (def_stmt
= SSA_NAME_DEF_STMT (rhs2
))
1493 && gimple_assign_single_p (def_stmt
)
1494 && (first
= uniform_vector_p
1495 (gimple_assign_rhs1 (def_stmt
))) != NULL_TREE
))
1497 gimple_assign_set_rhs2 (stmt
, first
);
1503 opv
= optab_for_tree_code (code
, type
, optab_vector
);
1504 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1508 op
= optab_for_tree_code (code
, type
, optab_scalar
);
1510 compute_type
= get_compute_type (code
, op
, type
);
1511 if (compute_type
== type
)
1513 /* The rtl expander will expand vector/scalar as vector/vector
1514 if necessary. Pick one with wider vector type. */
1515 tree compute_vtype
= get_compute_type (code
, opv
, type
);
1516 if (count_type_subparts (compute_vtype
)
1517 > count_type_subparts (compute_type
))
1519 compute_type
= compute_vtype
;
1524 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1526 if (compute_type
== NULL_TREE
)
1527 compute_type
= get_compute_type (code
, op
, type
);
1528 if (compute_type
== type
)
1530 /* Before splitting vector rotates into scalar rotates,
1531 see if we can't use vector shifts and BIT_IOR_EXPR
1532 instead. For vector by vector rotates we'd also
1533 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1534 for now, fold doesn't seem to create such rotates anyway. */
1535 if (compute_type
== TREE_TYPE (type
)
1536 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1538 optab oplv
= vashl_optab
, opl
= ashl_optab
;
1539 optab oprv
= vlshr_optab
, opr
= lshr_optab
, opo
= ior_optab
;
1540 tree compute_lvtype
= get_compute_type (LSHIFT_EXPR
, oplv
, type
);
1541 tree compute_rvtype
= get_compute_type (RSHIFT_EXPR
, oprv
, type
);
1542 tree compute_otype
= get_compute_type (BIT_IOR_EXPR
, opo
, type
);
1543 tree compute_ltype
= get_compute_type (LSHIFT_EXPR
, opl
, type
);
1544 tree compute_rtype
= get_compute_type (RSHIFT_EXPR
, opr
, type
);
1545 /* The rtl expander will expand vector/scalar as vector/vector
1546 if necessary. Pick one with wider vector type. */
1547 if (count_type_subparts (compute_lvtype
)
1548 > count_type_subparts (compute_ltype
))
1550 compute_ltype
= compute_lvtype
;
1553 if (count_type_subparts (compute_rvtype
)
1554 > count_type_subparts (compute_rtype
))
1556 compute_rtype
= compute_rvtype
;
1559 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1561 compute_type
= compute_ltype
;
1562 if (count_type_subparts (compute_type
)
1563 > count_type_subparts (compute_rtype
))
1564 compute_type
= compute_rtype
;
1565 if (count_type_subparts (compute_type
)
1566 > count_type_subparts (compute_otype
))
1567 compute_type
= compute_otype
;
1568 /* Verify all 3 operations can be performed in that type. */
1569 if (compute_type
!= TREE_TYPE (type
))
1571 if (optab_handler (opl
, TYPE_MODE (compute_type
))
1573 || optab_handler (opr
, TYPE_MODE (compute_type
))
1575 || optab_handler (opo
, TYPE_MODE (compute_type
))
1576 == CODE_FOR_nothing
)
1577 compute_type
= TREE_TYPE (type
);
1583 op
= optab_for_tree_code (code
, type
, optab_default
);
1585 /* Optabs will try converting a negation into a subtraction, so
1586 look for it as well. TODO: negation of floating-point vectors
1587 might be turned into an exclusive OR toggling the sign bit. */
1588 if (op
== unknown_optab
1589 && code
== NEGATE_EXPR
1590 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
1591 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
1593 if (compute_type
== NULL_TREE
)
1594 compute_type
= get_compute_type (code
, op
, type
);
1595 if (compute_type
== type
)
1598 gcc_assert (code
!= VEC_LSHIFT_EXPR
&& code
!= VEC_RSHIFT_EXPR
);
1599 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
1601 /* Leave expression untouched for later expansion. */
1602 if (new_rhs
== NULL_TREE
)
1605 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1606 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1609 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1610 way to do it is change expand_vector_operation and its callees to
1611 return a tree_code, RHS1 and RHS2 instead of a tree. */
1612 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1613 update_stmt (gsi_stmt (*gsi
));
1616 /* Use this to lower vector operations introduced by the vectorizer,
1617 if it may need the bit-twiddling tricks implemented in this file. */
1620 expand_vector_operations (void)
1622 gimple_stmt_iterator gsi
;
1624 bool cfg_changed
= false;
1626 FOR_EACH_BB_FN (bb
, cfun
)
1628 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1630 expand_vector_operations_1 (&gsi
);
1631 /* ??? If we do not cleanup EH then we will ICE in
1632 verification. But in reality we have created wrong-code
1633 as we did not properly transition EH info and edges to
1634 the piecewise computations. */
1635 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
1636 && gimple_purge_dead_eh_edges (bb
))
1641 return cfg_changed
? TODO_cleanup_cfg
: 0;
1646 const pass_data pass_data_lower_vector
=
1648 GIMPLE_PASS
, /* type */
1649 "veclower", /* name */
1650 OPTGROUP_VEC
, /* optinfo_flags */
1651 TV_NONE
, /* tv_id */
1652 PROP_cfg
, /* properties_required */
1653 PROP_gimple_lvec
, /* properties_provided */
1654 0, /* properties_destroyed */
1655 0, /* todo_flags_start */
1657 | TODO_cleanup_cfg
), /* todo_flags_finish */
1660 class pass_lower_vector
: public gimple_opt_pass
1663 pass_lower_vector (gcc::context
*ctxt
)
1664 : gimple_opt_pass (pass_data_lower_vector
, ctxt
)
1667 /* opt_pass methods: */
1668 virtual bool gate (function
*fun
)
1670 return !(fun
->curr_properties
& PROP_gimple_lvec
);
1673 virtual unsigned int execute (function
*)
1675 return expand_vector_operations ();
1678 }; // class pass_lower_vector
1683 make_pass_lower_vector (gcc::context
*ctxt
)
1685 return new pass_lower_vector (ctxt
);
1690 const pass_data pass_data_lower_vector_ssa
=
1692 GIMPLE_PASS
, /* type */
1693 "veclower2", /* name */
1694 OPTGROUP_VEC
, /* optinfo_flags */
1695 TV_NONE
, /* tv_id */
1696 PROP_cfg
, /* properties_required */
1697 PROP_gimple_lvec
, /* properties_provided */
1698 0, /* properties_destroyed */
1699 0, /* todo_flags_start */
1701 | TODO_cleanup_cfg
), /* todo_flags_finish */
1704 class pass_lower_vector_ssa
: public gimple_opt_pass
1707 pass_lower_vector_ssa (gcc::context
*ctxt
)
1708 : gimple_opt_pass (pass_data_lower_vector_ssa
, ctxt
)
1711 /* opt_pass methods: */
1712 opt_pass
* clone () { return new pass_lower_vector_ssa (m_ctxt
); }
1713 virtual unsigned int execute (function
*)
1715 return expand_vector_operations ();
1718 }; // class pass_lower_vector_ssa
1723 make_pass_lower_vector_ssa (gcc::context
*ctxt
)
1725 return new pass_lower_vector_ssa (ctxt
);
1728 #include "gt-tree-vect-generic.h"