1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
33 #include "diagnostic.h"
36 /* Need to include rtl.h, expr.h, etc. for optabs. */
41 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
44 /* Build a constant of type TYPE, made of VALUE's bits replicated
45 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
47 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
49 int width
= tree_low_cst (TYPE_SIZE (inner_type
), 1);
50 int n
= HOST_BITS_PER_WIDE_INT
/ width
;
51 unsigned HOST_WIDE_INT low
, high
, mask
;
56 if (width
== HOST_BITS_PER_WIDE_INT
)
60 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
61 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
64 if (TYPE_PRECISION (type
) < HOST_BITS_PER_WIDE_INT
)
65 low
&= ((HOST_WIDE_INT
)1 << TYPE_PRECISION (type
)) - 1, high
= 0;
66 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_WIDE_INT
)
68 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_DOUBLE_INT
)
73 ret
= build_int_cst_wide (type
, low
, high
);
77 static GTY(()) tree vector_inner_type
;
78 static GTY(()) tree vector_last_type
;
79 static GTY(()) int vector_last_nunits
;
81 /* Return a suitable vector types made of SUBPARTS units each of mode
82 "word_mode" (the global variable). */
84 build_word_mode_vector_type (int nunits
)
86 if (!vector_inner_type
)
87 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
88 else if (vector_last_nunits
== nunits
)
90 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
91 return vector_last_type
;
94 /* We build a new type, but we canonicalize it nevertheless,
95 because it still saves some memory. */
96 vector_last_nunits
= nunits
;
97 vector_last_type
= type_hash_canon (nunits
,
98 build_vector_type (vector_inner_type
,
100 return vector_last_type
;
103 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
104 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
107 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
108 tree t
, tree bitsize
, tree bitpos
)
111 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
113 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
117 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
118 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
121 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
122 return gimplify_build1 (gsi
, code
, inner_type
, a
);
126 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
127 tree bitpos
, tree bitsize
, enum tree_code code
)
129 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
130 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
131 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
132 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
133 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
136 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
138 INNER_TYPE is the type of A and B elements
140 returned expression is of signed integer type with the
141 size equal to the size of INNER_TYPE. */
143 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
144 tree bitpos
, tree bitsize
, enum tree_code code
)
148 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
149 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
151 comp_type
= build_nonstandard_integer_type
152 (GET_MODE_BITSIZE (TYPE_MODE (inner_type
)), 0);
154 return gimplify_build3 (gsi
, COND_EXPR
, comp_type
,
155 fold_build2 (code
, boolean_type_node
, a
, b
),
156 build_int_cst (comp_type
, -1),
157 build_int_cst (comp_type
, 0));
160 /* Expand vector addition to scalars. This does bit twiddling
161 in order to increase parallelism:
163 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
166 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
167 (a ^ ~b) & 0x80808080
169 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
171 This optimization should be done only if 4 vector items or more
174 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
175 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
178 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
179 unsigned HOST_WIDE_INT max
;
180 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
182 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
183 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
184 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
186 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
187 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
189 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
190 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
191 if (code
== PLUS_EXPR
)
192 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
195 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
196 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
199 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
200 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
201 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
205 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
206 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
207 tree bitsize ATTRIBUTE_UNUSED
,
208 enum tree_code code ATTRIBUTE_UNUSED
)
210 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
212 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
214 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
215 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
216 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
218 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
220 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
221 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
222 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
223 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
224 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
227 /* Expand a vector operation to scalars, by using many operations
228 whose type is the vector type's inner type. */
230 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
231 tree type
, tree inner_type
,
232 tree a
, tree b
, enum tree_code code
)
234 vec
<constructor_elt
, va_gc
> *v
;
235 tree part_width
= TYPE_SIZE (inner_type
);
236 tree index
= bitsize_int (0);
237 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
238 int delta
= tree_low_cst (part_width
, 1)
239 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type
)), 1);
241 location_t loc
= gimple_location (gsi_stmt (*gsi
));
243 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
244 warning_at (loc
, OPT_Wvector_operation_performance
,
245 "vector operation will be expanded piecewise");
247 warning_at (loc
, OPT_Wvector_operation_performance
,
248 "vector operation will be expanded in parallel");
250 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
251 for (i
= 0; i
< nunits
;
252 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
254 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
);
255 constructor_elt ce
= {NULL_TREE
, result
};
259 return build_constructor (type
, v
);
262 /* Expand a vector operation to scalars with the freedom to use
263 a scalar integer type, or to use a different size for the items
264 in the vector type. */
266 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
270 tree result
, compute_type
;
271 enum machine_mode mode
;
272 int n_words
= tree_low_cst (TYPE_SIZE_UNIT (type
), 1) / UNITS_PER_WORD
;
273 location_t loc
= gimple_location (gsi_stmt (*gsi
));
275 /* We have three strategies. If the type is already correct, just do
276 the operation an element at a time. Else, if the vector is wider than
277 one word, do it a word at a time; finally, if the vector is smaller
278 than one word, do it as a scalar. */
279 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
280 return expand_vector_piecewise (gsi
, f
,
281 type
, TREE_TYPE (type
),
283 else if (n_words
> 1)
285 tree word_type
= build_word_mode_vector_type (n_words
);
286 result
= expand_vector_piecewise (gsi
, f
,
287 word_type
, TREE_TYPE (word_type
),
289 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
294 /* Use a single scalar operation with a mode no wider than word_mode. */
295 mode
= mode_for_size (tree_low_cst (TYPE_SIZE (type
), 1), MODE_INT
, 0);
296 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
297 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
298 warning_at (loc
, OPT_Wvector_operation_performance
,
299 "vector operation will be expanded with a "
300 "single scalar operation");
306 /* Expand a vector operation to scalars; for integer types we can use
307 special bit twiddling tricks to do the sums a word at a time, using
308 function F_PARALLEL instead of F. These tricks are done only if
309 they can process at least four items, that is, only if the vector
310 holds at least four items and if a word can hold four items. */
312 expand_vector_addition (gimple_stmt_iterator
*gsi
,
313 elem_op_func f
, elem_op_func f_parallel
,
314 tree type
, tree a
, tree b
, enum tree_code code
)
316 int parts_per_word
= UNITS_PER_WORD
317 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type
)), 1);
319 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
320 && parts_per_word
>= 4
321 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
322 return expand_vector_parallel (gsi
, f_parallel
,
325 return expand_vector_piecewise (gsi
, f
,
326 type
, TREE_TYPE (type
),
330 /* Check if vector VEC consists of all the equal elements and
331 that the number of elements corresponds to the type of VEC.
332 The function returns first element of the vector
333 or NULL_TREE if the vector is not uniform. */
335 uniform_vector_p (tree vec
)
340 if (vec
== NULL_TREE
)
343 if (TREE_CODE (vec
) == VECTOR_CST
)
345 first
= VECTOR_CST_ELT (vec
, 0);
346 for (i
= 1; i
< VECTOR_CST_NELTS (vec
); ++i
)
347 if (!operand_equal_p (first
, VECTOR_CST_ELT (vec
, i
), 0))
353 else if (TREE_CODE (vec
) == CONSTRUCTOR
)
355 first
= error_mark_node
;
357 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec
), i
, t
)
364 if (!operand_equal_p (first
, t
, 0))
367 if (i
!= TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec
)))
376 /* Try to expand vector comparison expression OP0 CODE OP1 by
377 querying optab if the following expression:
378 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
381 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
382 tree op1
, enum tree_code code
)
385 if (! expand_vec_cond_expr_p (type
, TREE_TYPE (op0
)))
386 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
387 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
394 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
395 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
396 the result if successful, otherwise return NULL_TREE. */
398 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
401 unsigned int i
, nunits
= TYPE_VECTOR_SUBPARTS (type
);
402 bool scalar_shift
= true;
404 for (i
= 1; i
< nunits
; i
++)
406 if (shiftcnts
[i
] != shiftcnts
[0])
407 scalar_shift
= false;
410 if (scalar_shift
&& shiftcnts
[0] == 0)
415 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
416 if (op
!= unknown_optab
417 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
418 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
419 build_int_cst (NULL_TREE
, shiftcnts
[0]));
422 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
423 if (op
!= unknown_optab
424 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
426 tree
*vec
= XALLOCAVEC (tree
, nunits
);
427 for (i
= 0; i
< nunits
; i
++)
428 vec
[i
] = build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]);
429 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
430 build_vector (type
, vec
));
436 /* Try to expand integer vector division by constant using
437 widening multiply, shifts and additions. */
439 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
440 tree op1
, enum tree_code code
)
442 bool use_pow2
= true;
443 bool has_vector_shift
= true;
444 int mode
= -1, this_mode
;
445 int pre_shift
= -1, post_shift
;
446 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (type
);
447 int *shifts
= XALLOCAVEC (int, nunits
* 4);
448 int *pre_shifts
= shifts
+ nunits
;
449 int *post_shifts
= pre_shifts
+ nunits
;
450 int *shift_temps
= post_shifts
+ nunits
;
451 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
452 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
454 unsigned int i
, unsignedp
= TYPE_UNSIGNED (TREE_TYPE (type
));
455 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
457 tree cur_op
, mulcst
, tem
;
460 if (prec
> HOST_BITS_PER_WIDE_INT
)
463 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
464 if (op
== unknown_optab
465 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
466 has_vector_shift
= false;
468 /* Analysis phase. Determine if all op1 elements are either power
469 of two and it is possible to expand it using shifts (or for remainder
470 using masking). Additionally compute the multiplicative constants
471 and pre and post shifts if the division is to be expanded using
472 widening or high part multiplication plus shifts. */
473 for (i
= 0; i
< nunits
; i
++)
475 tree cst
= VECTOR_CST_ELT (op1
, i
);
476 unsigned HOST_WIDE_INT ml
;
478 if (!host_integerp (cst
, unsignedp
) || integer_zerop (cst
))
484 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
488 shifts
[i
] = tree_log2 (cst
);
489 if (shifts
[i
] != shifts
[0]
490 && code
== TRUNC_DIV_EXPR
491 && !has_vector_shift
)
498 unsigned HOST_WIDE_INT mh
;
499 unsigned HOST_WIDE_INT d
= tree_low_cst (cst
, 1) & mask
;
501 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
502 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
511 /* Find a suitable multiplier and right shift count
512 instead of multiplying with D. */
513 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
515 /* If the suggested multiplier is more than SIZE bits, we can
516 do better for even divisors, using an initial right shift. */
517 if ((mh
!= 0 && (d
& 1) == 0)
518 || (!has_vector_shift
&& pre_shift
!= -1))
520 if (has_vector_shift
)
521 pre_shift
= floor_log2 (d
& -d
);
522 else if (pre_shift
== -1)
525 for (j
= 0; j
< nunits
; j
++)
527 tree cst2
= VECTOR_CST_ELT (op1
, j
);
528 unsigned HOST_WIDE_INT d2
;
531 if (!host_integerp (cst2
, 1))
533 d2
= tree_low_cst (cst2
, 1) & mask
;
536 this_pre_shift
= floor_log2 (d2
& -d2
);
537 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
538 pre_shift
= this_pre_shift
;
540 if (i
!= 0 && pre_shift
!= 0)
550 if ((d
>> pre_shift
) <= 1)
555 mh
= choose_multiplier (d
>> pre_shift
, prec
,
557 &ml
, &post_shift
, &dummy_int
);
559 pre_shifts
[i
] = pre_shift
;
569 HOST_WIDE_INT d
= tree_low_cst (cst
, 0);
570 unsigned HOST_WIDE_INT abs_d
;
575 /* Since d might be INT_MIN, we have to cast to
576 unsigned HOST_WIDE_INT before negating to avoid
577 undefined signed overflow. */
579 ? (unsigned HOST_WIDE_INT
) d
580 : - (unsigned HOST_WIDE_INT
) d
);
582 /* n rem d = n rem -d */
583 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
585 else if (abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
587 /* This case is not handled correctly below. */
597 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
598 &post_shift
, &dummy_int
);
599 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
601 this_mode
= 4 + (d
< 0);
602 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
605 this_mode
= 2 + (d
< 0);
608 post_shifts
[i
] = post_shift
;
609 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
610 || post_shift
>= prec
611 || pre_shifts
[i
] >= prec
)
616 else if (mode
!= this_mode
)
620 vec
= XALLOCAVEC (tree
, nunits
);
624 tree addend
= NULL_TREE
;
629 /* Both division and remainder sequences need
630 op0 < 0 ? mask : 0 computed. It can be either computed as
631 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
632 if none of the shifts is 0, or as the conditional. */
633 for (i
= 0; i
< nunits
; i
++)
637 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
639 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
641 for (i
= 0; i
< nunits
; i
++)
642 shift_temps
[i
] = prec
- 1;
643 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
644 if (cur_op
!= NULL_TREE
)
646 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
648 for (i
= 0; i
< nunits
; i
++)
649 shift_temps
[i
] = prec
- shifts
[i
];
650 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
651 if (cur_op
!= NULL_TREE
)
652 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
656 if (addend
== NULL_TREE
657 && expand_vec_cond_expr_p (type
, type
))
659 tree zero
, cst
, cond
;
662 zero
= build_zero_cst (type
);
663 cond
= build2 (LT_EXPR
, type
, op0
, zero
);
664 for (i
= 0; i
< nunits
; i
++)
665 vec
[i
] = build_int_cst (TREE_TYPE (type
),
666 ((unsigned HOST_WIDE_INT
) 1
668 cst
= build_vector (type
, vec
);
669 addend
= make_ssa_name (type
, NULL
);
670 stmt
= gimple_build_assign_with_ops (VEC_COND_EXPR
, addend
,
672 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
675 if (code
== TRUNC_DIV_EXPR
)
679 /* q = op0 >> shift; */
680 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
681 if (cur_op
!= NULL_TREE
)
684 else if (addend
!= NULL_TREE
)
686 /* t1 = op0 + addend;
688 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
689 if (op
!= unknown_optab
690 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
692 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
693 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
694 if (cur_op
!= NULL_TREE
)
702 for (i
= 0; i
< nunits
; i
++)
703 vec
[i
] = build_int_cst (TREE_TYPE (type
),
704 ((unsigned HOST_WIDE_INT
) 1
706 mask
= build_vector (type
, vec
);
707 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
708 if (op
!= unknown_optab
709 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
712 /* r = op0 & mask; */
713 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
714 else if (addend
!= NULL_TREE
)
716 /* t1 = op0 + addend;
719 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
720 if (op
!= unknown_optab
721 && optab_handler (op
, TYPE_MODE (type
))
724 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
726 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
728 op
= optab_for_tree_code (MINUS_EXPR
, type
,
730 if (op
!= unknown_optab
731 && optab_handler (op
, TYPE_MODE (type
))
733 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
741 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
744 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
752 gcc_assert (unsignedp
);
753 /* t1 = oprnd0 >> pre_shift;
755 q = t2 >> post_shift; */
756 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
757 if (cur_op
== NULL_TREE
)
761 gcc_assert (unsignedp
);
762 for (i
= 0; i
< nunits
; i
++)
772 gcc_assert (!unsignedp
);
773 for (i
= 0; i
< nunits
; i
++)
774 shift_temps
[i
] = prec
- 1;
780 for (i
= 0; i
< nunits
; i
++)
781 vec
[i
] = build_int_cst (TREE_TYPE (type
), mulc
[i
]);
782 mulcst
= build_vector (type
, vec
);
784 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
789 /* t1 = oprnd0 >> pre_shift;
791 q = t2 >> post_shift; */
792 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
795 /* t1 = oprnd0 h* ml;
799 q = t4 >> (post_shift - 1); */
800 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
801 if (op
== unknown_optab
802 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
804 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
805 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
806 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
807 if (op
== unknown_optab
808 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
810 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
811 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
812 if (cur_op
== NULL_TREE
)
819 /* t1 = oprnd0 h* ml;
820 t2 = t1; [ iff (mode & 2) != 0 ]
821 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
822 t3 = t2 >> post_shift;
823 t4 = oprnd0 >> (prec - 1);
824 q = t3 - t4; [ iff (mode & 1) == 0 ]
825 q = t4 - t3; [ iff (mode & 1) != 0 ] */
828 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
829 if (op
== unknown_optab
830 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
832 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
834 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
835 if (cur_op
== NULL_TREE
)
837 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
838 if (tem
== NULL_TREE
)
840 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
841 if (op
== unknown_optab
842 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
845 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
847 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
853 if (code
== TRUNC_DIV_EXPR
)
856 /* We divided. Now finish by:
859 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
860 if (op
== unknown_optab
861 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
863 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
864 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
865 if (op
== unknown_optab
866 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
868 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
871 /* Expand a vector condition to scalars, by using many conditions
872 on the vector's elements. */
874 expand_vector_condition (gimple_stmt_iterator
*gsi
)
876 gimple stmt
= gsi_stmt (*gsi
);
877 tree type
= gimple_expr_type (stmt
);
878 tree a
= gimple_assign_rhs1 (stmt
);
881 bool a_is_comparison
= false;
882 tree b
= gimple_assign_rhs2 (stmt
);
883 tree c
= gimple_assign_rhs3 (stmt
);
884 vec
<constructor_elt
, va_gc
> *v
;
886 tree inner_type
= TREE_TYPE (type
);
887 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
888 tree comp_inner_type
= cond_type
;
889 tree width
= TYPE_SIZE (inner_type
);
890 tree index
= bitsize_int (0);
891 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
893 location_t loc
= gimple_location (gsi_stmt (*gsi
));
895 if (!is_gimple_val (a
))
897 gcc_assert (COMPARISON_CLASS_P (a
));
898 a_is_comparison
= true;
899 a1
= TREE_OPERAND (a
, 0);
900 a2
= TREE_OPERAND (a
, 1);
901 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
904 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
)))
907 /* TODO: try and find a smaller vector type. */
909 warning_at (loc
, OPT_Wvector_operation_performance
,
910 "vector condition will be expanded piecewise");
912 vec_alloc (v
, nunits
);
913 for (i
= 0; i
< nunits
;
914 i
++, index
= int_const_binop (PLUS_EXPR
, index
, width
))
917 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
918 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
921 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
, width
, index
);
922 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
, width
, index
);
923 aa
= build2 (TREE_CODE (a
), cond_type
, aa1
, aa2
);
926 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
927 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
928 constructor_elt ce
= {NULL_TREE
, result
};
932 constr
= build_constructor (type
, v
);
933 gimple_assign_set_rhs_from_tree (gsi
, constr
);
934 update_stmt (gsi_stmt (*gsi
));
938 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
939 gimple assign
, enum tree_code code
)
941 enum machine_mode compute_mode
= TYPE_MODE (compute_type
);
943 /* If the compute mode is not a vector mode (hence we are not decomposing
944 a BLKmode vector to smaller, hardware-supported vectors), we may want
945 to expand the operations in parallel. */
946 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
947 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
948 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
949 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
950 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
951 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
956 if (!TYPE_OVERFLOW_TRAPS (type
))
957 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
958 gimple_assign_rhs1 (assign
),
959 gimple_assign_rhs2 (assign
), code
);
963 if (!TYPE_OVERFLOW_TRAPS (type
))
964 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
965 gimple_assign_rhs1 (assign
),
972 return expand_vector_parallel (gsi
, do_binop
, type
,
973 gimple_assign_rhs1 (assign
),
974 gimple_assign_rhs2 (assign
), code
);
977 return expand_vector_parallel (gsi
, do_unop
, type
,
978 gimple_assign_rhs1 (assign
),
995 tree rhs1
= gimple_assign_rhs1 (assign
);
996 tree rhs2
= gimple_assign_rhs2 (assign
);
998 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
1001 case TRUNC_DIV_EXPR
:
1002 case TRUNC_MOD_EXPR
:
1004 tree rhs1
= gimple_assign_rhs1 (assign
);
1005 tree rhs2
= gimple_assign_rhs2 (assign
);
1009 || !VECTOR_INTEGER_TYPE_P (type
)
1010 || TREE_CODE (rhs2
) != VECTOR_CST
)
1013 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
1014 if (ret
!= NULL_TREE
)
1023 if (TREE_CODE_CLASS (code
) == tcc_unary
)
1024 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
1025 gimple_assign_rhs1 (assign
),
1028 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
1029 gimple_assign_rhs1 (assign
),
1030 gimple_assign_rhs2 (assign
), code
);
1033 /* Return a type for the widest vector mode whose components are of type
1034 TYPE, or NULL_TREE if none is found. */
1037 type_for_widest_vector_mode (tree type
, optab op
)
1039 enum machine_mode inner_mode
= TYPE_MODE (type
);
1040 enum machine_mode best_mode
= VOIDmode
, mode
;
1041 int best_nunits
= 0;
1043 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1044 mode
= MIN_MODE_VECTOR_FLOAT
;
1045 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1046 mode
= MIN_MODE_VECTOR_FRACT
;
1047 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1048 mode
= MIN_MODE_VECTOR_UFRACT
;
1049 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1050 mode
= MIN_MODE_VECTOR_ACCUM
;
1051 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1052 mode
= MIN_MODE_VECTOR_UACCUM
;
1054 mode
= MIN_MODE_VECTOR_INT
;
1056 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
1057 if (GET_MODE_INNER (mode
) == inner_mode
1058 && GET_MODE_NUNITS (mode
) > best_nunits
1059 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1060 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1062 if (best_mode
== VOIDmode
)
1065 return build_vector_type_for_mode (type
, best_mode
);
1069 /* Build a reference to the element of the vector VECT. Function
1070 returns either the element itself, either BIT_FIELD_REF, or an
1071 ARRAY_REF expression.
1073 GSI is required to insert temporary variables while building a
1074 refernece to the element of the vector VECT.
1076 PTMPVEC is a pointer to the temporary variable for caching
1077 purposes. In case when PTMPVEC is NULL new temporary variable
1080 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1082 tree vect_type
, vect_elt_type
;
1086 bool need_asgn
= true;
1087 unsigned int elements
;
1089 vect_type
= TREE_TYPE (vect
);
1090 vect_elt_type
= TREE_TYPE (vect_type
);
1091 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1093 if (TREE_CODE (idx
) == INTEGER_CST
)
1095 unsigned HOST_WIDE_INT index
;
1097 /* Given that we're about to compute a binary modulus,
1098 we don't care about the high bits of the value. */
1099 index
= TREE_INT_CST_LOW (idx
);
1100 if (!host_integerp (idx
, 1) || index
>= elements
)
1102 index
&= elements
- 1;
1103 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1106 /* When lowering a vector statement sequence do some easy
1107 simplification by looking through intermediate vector results. */
1108 if (TREE_CODE (vect
) == SSA_NAME
)
1110 gimple def_stmt
= SSA_NAME_DEF_STMT (vect
);
1111 if (is_gimple_assign (def_stmt
)
1112 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1113 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1114 vect
= gimple_assign_rhs1 (def_stmt
);
1117 if (TREE_CODE (vect
) == VECTOR_CST
)
1118 return VECTOR_CST_ELT (vect
, index
);
1119 else if (TREE_CODE (vect
) == CONSTRUCTOR
1120 && (CONSTRUCTOR_NELTS (vect
) == 0
1121 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1124 if (index
< CONSTRUCTOR_NELTS (vect
))
1125 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1126 return build_zero_cst (vect_elt_type
);
1130 tree size
= TYPE_SIZE (vect_elt_type
);
1131 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1133 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1138 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1140 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1149 TREE_ADDRESSABLE (tmpvec
) = 1;
1150 asgn
= gimple_build_assign (tmpvec
, vect
);
1151 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1154 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1155 return build4 (ARRAY_REF
, vect_elt_type
,
1156 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1157 idx
, NULL_TREE
, NULL_TREE
);
1160 /* Check if VEC_PERM_EXPR within the given setting is supported
1161 by hardware, or lower it piecewise.
1163 When VEC_PERM_EXPR has the same first and second operands:
1164 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1165 {v0[mask[0]], v0[mask[1]], ...}
1166 MASK and V0 must have the same number of elements.
1168 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1169 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1170 V0 and V1 must have the same type. MASK, V0, V1 must have the
1171 same number of arguments. */
1174 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1176 gimple stmt
= gsi_stmt (*gsi
);
1177 tree mask
= gimple_assign_rhs3 (stmt
);
1178 tree vec0
= gimple_assign_rhs1 (stmt
);
1179 tree vec1
= gimple_assign_rhs2 (stmt
);
1180 tree vect_type
= TREE_TYPE (vec0
);
1181 tree mask_type
= TREE_TYPE (mask
);
1182 tree vect_elt_type
= TREE_TYPE (vect_type
);
1183 tree mask_elt_type
= TREE_TYPE (mask_type
);
1184 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1185 vec
<constructor_elt
, va_gc
> *v
;
1186 tree constr
, t
, si
, i_val
;
1187 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1188 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1189 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1192 if (TREE_CODE (mask
) == SSA_NAME
)
1194 gimple def_stmt
= SSA_NAME_DEF_STMT (mask
);
1195 if (is_gimple_assign (def_stmt
)
1196 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1197 mask
= gimple_assign_rhs1 (def_stmt
);
1200 if (TREE_CODE (mask
) == VECTOR_CST
)
1202 unsigned char *sel_int
= XALLOCAVEC (unsigned char, elements
);
1204 for (i
= 0; i
< elements
; ++i
)
1205 sel_int
[i
] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask
, i
))
1206 & (2 * elements
- 1));
1208 if (can_vec_perm_p (TYPE_MODE (vect_type
), false, sel_int
))
1210 gimple_assign_set_rhs3 (stmt
, mask
);
1215 else if (can_vec_perm_p (TYPE_MODE (vect_type
), true, NULL
))
1218 warning_at (loc
, OPT_Wvector_operation_performance
,
1219 "vector shuffling operation will be expanded piecewise");
1221 vec_alloc (v
, elements
);
1222 for (i
= 0; i
< elements
; i
++)
1225 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1227 if (TREE_CODE (i_val
) == INTEGER_CST
)
1229 unsigned HOST_WIDE_INT index
;
1231 index
= TREE_INT_CST_LOW (i_val
);
1232 if (!host_integerp (i_val
, 1) || index
>= elements
)
1233 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1235 if (two_operand_p
&& (index
& elements
) != 0)
1236 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1238 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1240 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1241 true, GSI_SAME_STMT
);
1245 tree cond
= NULL_TREE
, v0_val
;
1249 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1250 build_int_cst (mask_elt_type
, elements
));
1251 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1252 true, GSI_SAME_STMT
);
1255 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1256 build_int_cst (mask_elt_type
, elements
- 1));
1257 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1258 true, GSI_SAME_STMT
);
1260 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1261 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1262 true, GSI_SAME_STMT
);
1268 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1269 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1270 true, GSI_SAME_STMT
);
1272 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1273 cond
, build_zero_cst (mask_elt_type
));
1274 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1275 cond
, v0_val
, v1_val
);
1276 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1277 true, GSI_SAME_STMT
);
1283 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1286 constr
= build_constructor (vect_type
, v
);
1287 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1288 update_stmt (gsi_stmt (*gsi
));
1291 /* Process one statement. If we identify a vector operation, expand it. */
1294 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
1296 gimple stmt
= gsi_stmt (*gsi
);
1297 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
;
1298 enum tree_code code
;
1299 enum machine_mode compute_mode
;
1300 optab op
= unknown_optab
;
1301 enum gimple_rhs_class rhs_class
;
1304 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1307 code
= gimple_assign_rhs_code (stmt
);
1308 rhs_class
= get_gimple_rhs_class (code
);
1309 lhs
= gimple_assign_lhs (stmt
);
1311 if (code
== VEC_PERM_EXPR
)
1313 lower_vec_perm (gsi
);
1317 if (code
== VEC_COND_EXPR
)
1319 expand_vector_condition (gsi
);
1322 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
1325 rhs1
= gimple_assign_rhs1 (stmt
);
1326 type
= gimple_expr_type (stmt
);
1327 if (rhs_class
== GIMPLE_BINARY_RHS
)
1328 rhs2
= gimple_assign_rhs2 (stmt
);
1330 if (TREE_CODE (type
) != VECTOR_TYPE
)
1333 if (code
== NOP_EXPR
1334 || code
== FLOAT_EXPR
1335 || code
== FIX_TRUNC_EXPR
1336 || code
== VIEW_CONVERT_EXPR
)
1339 gcc_assert (code
!= CONVERT_EXPR
);
1341 /* The signedness is determined from input argument. */
1342 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
1343 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
1344 type
= TREE_TYPE (rhs1
);
1346 /* For widening/narrowing vector operations, the relevant type is of the
1347 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1348 calculated in the same way above. */
1349 if (code
== WIDEN_SUM_EXPR
1350 || code
== VEC_WIDEN_MULT_HI_EXPR
1351 || code
== VEC_WIDEN_MULT_LO_EXPR
1352 || code
== VEC_WIDEN_MULT_EVEN_EXPR
1353 || code
== VEC_WIDEN_MULT_ODD_EXPR
1354 || code
== VEC_UNPACK_HI_EXPR
1355 || code
== VEC_UNPACK_LO_EXPR
1356 || code
== VEC_PACK_TRUNC_EXPR
1357 || code
== VEC_PACK_SAT_EXPR
1358 || code
== VEC_PACK_FIX_TRUNC_EXPR
1359 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
1360 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
1361 type
= TREE_TYPE (rhs1
);
1363 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1365 if (code
== LSHIFT_EXPR
1366 || code
== RSHIFT_EXPR
1367 || code
== LROTATE_EXPR
1368 || code
== RROTATE_EXPR
)
1372 /* Check whether we have vector <op> {x,x,x,x} where x
1373 could be a scalar variable or a constant. Transform
1374 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1375 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1380 if ((TREE_CODE (rhs2
) == VECTOR_CST
1381 && (first
= uniform_vector_p (rhs2
)) != NULL_TREE
)
1382 || (TREE_CODE (rhs2
) == SSA_NAME
1383 && (def_stmt
= SSA_NAME_DEF_STMT (rhs2
))
1384 && gimple_assign_single_p (def_stmt
)
1385 && (first
= uniform_vector_p
1386 (gimple_assign_rhs1 (def_stmt
))) != NULL_TREE
))
1388 gimple_assign_set_rhs2 (stmt
, first
);
1394 opv
= optab_for_tree_code (code
, type
, optab_vector
);
1395 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1399 op
= optab_for_tree_code (code
, type
, optab_scalar
);
1401 /* The rtl expander will expand vector/scalar as vector/vector
1402 if necessary. Don't bother converting the stmt here. */
1403 if (optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
1404 && optab_handler (opv
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
1409 op
= optab_for_tree_code (code
, type
, optab_default
);
1411 /* Optabs will try converting a negation into a subtraction, so
1412 look for it as well. TODO: negation of floating-point vectors
1413 might be turned into an exclusive OR toggling the sign bit. */
1414 if (op
== unknown_optab
1415 && code
== NEGATE_EXPR
1416 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
1417 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
1419 /* For very wide vectors, try using a smaller vector mode. */
1420 compute_type
= type
;
1421 if (!VECTOR_MODE_P (TYPE_MODE (type
)) && op
)
1423 tree vector_compute_type
1424 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1425 if (vector_compute_type
!= NULL_TREE
1426 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
1427 < TYPE_VECTOR_SUBPARTS (compute_type
))
1428 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1429 != CODE_FOR_nothing
))
1430 compute_type
= vector_compute_type
;
1433 /* If we are breaking a BLKmode vector into smaller pieces,
1434 type_for_widest_vector_mode has already looked into the optab,
1435 so skip these checks. */
1436 if (compute_type
== type
)
1438 compute_mode
= TYPE_MODE (compute_type
);
1439 if (VECTOR_MODE_P (compute_mode
))
1441 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1443 if (code
== MULT_HIGHPART_EXPR
1444 && can_mult_highpart_p (compute_mode
,
1445 TYPE_UNSIGNED (compute_type
)))
1448 /* There is no operation in hardware, so fall back to scalars. */
1449 compute_type
= TREE_TYPE (type
);
1452 gcc_assert (code
!= VEC_LSHIFT_EXPR
&& code
!= VEC_RSHIFT_EXPR
);
1453 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
1455 /* Leave expression untouched for later expansion. */
1456 if (new_rhs
== NULL_TREE
)
1459 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1460 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1463 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1464 way to do it is change expand_vector_operation and its callees to
1465 return a tree_code, RHS1 and RHS2 instead of a tree. */
1466 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1467 update_stmt (gsi_stmt (*gsi
));
1470 /* Use this to lower vector operations introduced by the vectorizer,
1471 if it may need the bit-twiddling tricks implemented in this file. */
1474 gate_expand_vector_operations_ssa (void)
1476 return optimize
== 0;
1480 expand_vector_operations (void)
1482 gimple_stmt_iterator gsi
;
1484 bool cfg_changed
= false;
1488 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1490 expand_vector_operations_1 (&gsi
);
1491 /* ??? If we do not cleanup EH then we will ICE in
1492 verification. But in reality we have created wrong-code
1493 as we did not properly transition EH info and edges to
1494 the piecewise computations. */
1495 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
1496 && gimple_purge_dead_eh_edges (bb
))
1501 return cfg_changed
? TODO_cleanup_cfg
: 0;
1504 struct gimple_opt_pass pass_lower_vector
=
1508 "veclower", /* name */
1509 OPTGROUP_VEC
, /* optinfo_flags */
1510 gate_expand_vector_operations_ssa
, /* gate */
1511 expand_vector_operations
, /* execute */
1514 0, /* static_pass_number */
1515 TV_NONE
, /* tv_id */
1516 PROP_cfg
, /* properties_required */
1517 0, /* properties_provided */
1518 0, /* properties_destroyed */
1519 0, /* todo_flags_start */
1520 TODO_update_ssa
/* todo_flags_finish */
1522 | TODO_verify_stmts
| TODO_verify_flow
1527 struct gimple_opt_pass pass_lower_vector_ssa
=
1531 "veclower2", /* name */
1532 OPTGROUP_VEC
, /* optinfo_flags */
1534 expand_vector_operations
, /* execute */
1537 0, /* static_pass_number */
1538 TV_NONE
, /* tv_id */
1539 PROP_cfg
, /* properties_required */
1540 0, /* properties_provided */
1541 0, /* properties_destroyed */
1542 0, /* todo_flags_start */
1543 TODO_update_ssa
/* todo_flags_finish */
1545 | TODO_verify_stmts
| TODO_verify_flow
1550 #include "gt-tree-vect-generic.h"