1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
24 #include "stor-layout.h"
26 #include "langhooks.h"
27 #include "basic-block.h"
28 #include "tree-ssa-alias.h"
29 #include "internal-fn.h"
31 #include "gimple-expr.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "gimple-ssa.h"
38 #include "stringpool.h"
39 #include "tree-ssanames.h"
40 #include "tree-iterator.h"
41 #include "tree-pass.h"
43 #include "diagnostic.h"
46 /* Need to include rtl.h, expr.h, etc. for optabs. */
51 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
54 /* Build a constant of type TYPE, made of VALUE's bits replicated
55 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
57 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
59 int width
= tree_to_uhwi (TYPE_SIZE (inner_type
));
60 int n
= HOST_BITS_PER_WIDE_INT
/ width
;
61 unsigned HOST_WIDE_INT low
, high
, mask
;
66 if (width
== HOST_BITS_PER_WIDE_INT
)
70 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
71 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
74 if (TYPE_PRECISION (type
) < HOST_BITS_PER_WIDE_INT
)
75 low
&= ((HOST_WIDE_INT
)1 << TYPE_PRECISION (type
)) - 1, high
= 0;
76 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_WIDE_INT
)
78 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_DOUBLE_INT
)
83 ret
= build_int_cst_wide (type
, low
, high
);
87 static GTY(()) tree vector_inner_type
;
88 static GTY(()) tree vector_last_type
;
89 static GTY(()) int vector_last_nunits
;
91 /* Return a suitable vector types made of SUBPARTS units each of mode
92 "word_mode" (the global variable). */
94 build_word_mode_vector_type (int nunits
)
96 if (!vector_inner_type
)
97 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
98 else if (vector_last_nunits
== nunits
)
100 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
101 return vector_last_type
;
104 /* We build a new type, but we canonicalize it nevertheless,
105 because it still saves some memory. */
106 vector_last_nunits
= nunits
;
107 vector_last_type
= type_hash_canon (nunits
,
108 build_vector_type (vector_inner_type
,
110 return vector_last_type
;
113 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
114 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
117 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
118 tree t
, tree bitsize
, tree bitpos
)
121 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
123 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
127 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
128 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
131 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
132 return gimplify_build1 (gsi
, code
, inner_type
, a
);
136 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
137 tree bitpos
, tree bitsize
, enum tree_code code
)
139 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
140 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
141 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
142 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
143 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
146 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
148 INNER_TYPE is the type of A and B elements
150 returned expression is of signed integer type with the
151 size equal to the size of INNER_TYPE. */
153 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
154 tree bitpos
, tree bitsize
, enum tree_code code
)
158 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
159 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
161 comp_type
= build_nonstandard_integer_type
162 (GET_MODE_BITSIZE (TYPE_MODE (inner_type
)), 0);
164 return gimplify_build3 (gsi
, COND_EXPR
, comp_type
,
165 fold_build2 (code
, boolean_type_node
, a
, b
),
166 build_int_cst (comp_type
, -1),
167 build_int_cst (comp_type
, 0));
170 /* Expand vector addition to scalars. This does bit twiddling
171 in order to increase parallelism:
173 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
176 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
177 (a ^ ~b) & 0x80808080
179 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
181 This optimization should be done only if 4 vector items or more
184 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
185 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
188 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
189 unsigned HOST_WIDE_INT max
;
190 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
192 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
193 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
194 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
196 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
197 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
199 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
200 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
201 if (code
== PLUS_EXPR
)
202 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
205 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
206 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
209 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
210 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
211 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
215 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
216 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
217 tree bitsize ATTRIBUTE_UNUSED
,
218 enum tree_code code ATTRIBUTE_UNUSED
)
220 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
222 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
224 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
225 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
226 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
228 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
230 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
231 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
232 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
233 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
234 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
237 /* Expand a vector operation to scalars, by using many operations
238 whose type is the vector type's inner type. */
240 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
241 tree type
, tree inner_type
,
242 tree a
, tree b
, enum tree_code code
)
244 vec
<constructor_elt
, va_gc
> *v
;
245 tree part_width
= TYPE_SIZE (inner_type
);
246 tree index
= bitsize_int (0);
247 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
248 int delta
= tree_to_uhwi (part_width
)
249 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type
)));
251 location_t loc
= gimple_location (gsi_stmt (*gsi
));
253 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
254 warning_at (loc
, OPT_Wvector_operation_performance
,
255 "vector operation will be expanded piecewise");
257 warning_at (loc
, OPT_Wvector_operation_performance
,
258 "vector operation will be expanded in parallel");
260 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
261 for (i
= 0; i
< nunits
;
262 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
264 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
);
265 constructor_elt ce
= {NULL_TREE
, result
};
269 return build_constructor (type
, v
);
272 /* Expand a vector operation to scalars with the freedom to use
273 a scalar integer type, or to use a different size for the items
274 in the vector type. */
276 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
280 tree result
, compute_type
;
281 enum machine_mode mode
;
282 int n_words
= tree_to_uhwi (TYPE_SIZE_UNIT (type
)) / UNITS_PER_WORD
;
283 location_t loc
= gimple_location (gsi_stmt (*gsi
));
285 /* We have three strategies. If the type is already correct, just do
286 the operation an element at a time. Else, if the vector is wider than
287 one word, do it a word at a time; finally, if the vector is smaller
288 than one word, do it as a scalar. */
289 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
290 return expand_vector_piecewise (gsi
, f
,
291 type
, TREE_TYPE (type
),
293 else if (n_words
> 1)
295 tree word_type
= build_word_mode_vector_type (n_words
);
296 result
= expand_vector_piecewise (gsi
, f
,
297 word_type
, TREE_TYPE (word_type
),
299 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
304 /* Use a single scalar operation with a mode no wider than word_mode. */
305 mode
= mode_for_size (tree_to_uhwi (TYPE_SIZE (type
)), MODE_INT
, 0);
306 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
307 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
308 warning_at (loc
, OPT_Wvector_operation_performance
,
309 "vector operation will be expanded with a "
310 "single scalar operation");
316 /* Expand a vector operation to scalars; for integer types we can use
317 special bit twiddling tricks to do the sums a word at a time, using
318 function F_PARALLEL instead of F. These tricks are done only if
319 they can process at least four items, that is, only if the vector
320 holds at least four items and if a word can hold four items. */
322 expand_vector_addition (gimple_stmt_iterator
*gsi
,
323 elem_op_func f
, elem_op_func f_parallel
,
324 tree type
, tree a
, tree b
, enum tree_code code
)
326 int parts_per_word
= UNITS_PER_WORD
327 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
329 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
330 && parts_per_word
>= 4
331 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
332 return expand_vector_parallel (gsi
, f_parallel
,
335 return expand_vector_piecewise (gsi
, f
,
336 type
, TREE_TYPE (type
),
340 /* Try to expand vector comparison expression OP0 CODE OP1 by
341 querying optab if the following expression:
342 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
345 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
346 tree op1
, enum tree_code code
)
349 if (! expand_vec_cond_expr_p (type
, TREE_TYPE (op0
)))
350 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
351 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
358 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
359 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
360 the result if successful, otherwise return NULL_TREE. */
362 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
365 unsigned int i
, nunits
= TYPE_VECTOR_SUBPARTS (type
);
366 bool scalar_shift
= true;
368 for (i
= 1; i
< nunits
; i
++)
370 if (shiftcnts
[i
] != shiftcnts
[0])
371 scalar_shift
= false;
374 if (scalar_shift
&& shiftcnts
[0] == 0)
379 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
380 if (op
!= unknown_optab
381 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
382 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
383 build_int_cst (NULL_TREE
, shiftcnts
[0]));
386 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
387 if (op
!= unknown_optab
388 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
390 tree
*vec
= XALLOCAVEC (tree
, nunits
);
391 for (i
= 0; i
< nunits
; i
++)
392 vec
[i
] = build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]);
393 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
394 build_vector (type
, vec
));
400 /* Try to expand integer vector division by constant using
401 widening multiply, shifts and additions. */
403 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
404 tree op1
, enum tree_code code
)
406 bool use_pow2
= true;
407 bool has_vector_shift
= true;
408 int mode
= -1, this_mode
;
409 int pre_shift
= -1, post_shift
;
410 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (type
);
411 int *shifts
= XALLOCAVEC (int, nunits
* 4);
412 int *pre_shifts
= shifts
+ nunits
;
413 int *post_shifts
= pre_shifts
+ nunits
;
414 int *shift_temps
= post_shifts
+ nunits
;
415 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
416 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
418 unsigned int i
, unsignedp
= TYPE_UNSIGNED (TREE_TYPE (type
));
419 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
421 tree cur_op
, mulcst
, tem
;
424 if (prec
> HOST_BITS_PER_WIDE_INT
)
427 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
428 if (op
== unknown_optab
429 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
430 has_vector_shift
= false;
432 /* Analysis phase. Determine if all op1 elements are either power
433 of two and it is possible to expand it using shifts (or for remainder
434 using masking). Additionally compute the multiplicative constants
435 and pre and post shifts if the division is to be expanded using
436 widening or high part multiplication plus shifts. */
437 for (i
= 0; i
< nunits
; i
++)
439 tree cst
= VECTOR_CST_ELT (op1
, i
);
440 unsigned HOST_WIDE_INT ml
;
442 if (TREE_CODE (cst
) != INTEGER_CST
|| integer_zerop (cst
))
448 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
452 shifts
[i
] = tree_log2 (cst
);
453 if (shifts
[i
] != shifts
[0]
454 && code
== TRUNC_DIV_EXPR
455 && !has_vector_shift
)
462 unsigned HOST_WIDE_INT mh
;
463 unsigned HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
) & mask
;
465 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
466 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
475 /* Find a suitable multiplier and right shift count
476 instead of multiplying with D. */
477 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
479 /* If the suggested multiplier is more than SIZE bits, we can
480 do better for even divisors, using an initial right shift. */
481 if ((mh
!= 0 && (d
& 1) == 0)
482 || (!has_vector_shift
&& pre_shift
!= -1))
484 if (has_vector_shift
)
485 pre_shift
= floor_log2 (d
& -d
);
486 else if (pre_shift
== -1)
489 for (j
= 0; j
< nunits
; j
++)
491 tree cst2
= VECTOR_CST_ELT (op1
, j
);
492 unsigned HOST_WIDE_INT d2
;
495 if (!tree_fits_uhwi_p (cst2
))
497 d2
= tree_to_uhwi (cst2
) & mask
;
500 this_pre_shift
= floor_log2 (d2
& -d2
);
501 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
502 pre_shift
= this_pre_shift
;
504 if (i
!= 0 && pre_shift
!= 0)
514 if ((d
>> pre_shift
) <= 1)
519 mh
= choose_multiplier (d
>> pre_shift
, prec
,
521 &ml
, &post_shift
, &dummy_int
);
523 pre_shifts
[i
] = pre_shift
;
533 HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
);
534 unsigned HOST_WIDE_INT abs_d
;
539 /* Since d might be INT_MIN, we have to cast to
540 unsigned HOST_WIDE_INT before negating to avoid
541 undefined signed overflow. */
543 ? (unsigned HOST_WIDE_INT
) d
544 : - (unsigned HOST_WIDE_INT
) d
);
546 /* n rem d = n rem -d */
547 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
549 else if (abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
551 /* This case is not handled correctly below. */
561 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
562 &post_shift
, &dummy_int
);
563 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
565 this_mode
= 4 + (d
< 0);
566 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
569 this_mode
= 2 + (d
< 0);
572 post_shifts
[i
] = post_shift
;
573 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
574 || post_shift
>= prec
575 || pre_shifts
[i
] >= prec
)
580 else if (mode
!= this_mode
)
584 vec
= XALLOCAVEC (tree
, nunits
);
588 tree addend
= NULL_TREE
;
593 /* Both division and remainder sequences need
594 op0 < 0 ? mask : 0 computed. It can be either computed as
595 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
596 if none of the shifts is 0, or as the conditional. */
597 for (i
= 0; i
< nunits
; i
++)
601 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
603 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
605 for (i
= 0; i
< nunits
; i
++)
606 shift_temps
[i
] = prec
- 1;
607 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
608 if (cur_op
!= NULL_TREE
)
610 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
612 for (i
= 0; i
< nunits
; i
++)
613 shift_temps
[i
] = prec
- shifts
[i
];
614 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
615 if (cur_op
!= NULL_TREE
)
616 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
620 if (addend
== NULL_TREE
621 && expand_vec_cond_expr_p (type
, type
))
623 tree zero
, cst
, cond
;
626 zero
= build_zero_cst (type
);
627 cond
= build2 (LT_EXPR
, type
, op0
, zero
);
628 for (i
= 0; i
< nunits
; i
++)
629 vec
[i
] = build_int_cst (TREE_TYPE (type
),
630 ((unsigned HOST_WIDE_INT
) 1
632 cst
= build_vector (type
, vec
);
633 addend
= make_ssa_name (type
, NULL
);
634 stmt
= gimple_build_assign_with_ops (VEC_COND_EXPR
, addend
,
636 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
639 if (code
== TRUNC_DIV_EXPR
)
643 /* q = op0 >> shift; */
644 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
645 if (cur_op
!= NULL_TREE
)
648 else if (addend
!= NULL_TREE
)
650 /* t1 = op0 + addend;
652 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
653 if (op
!= unknown_optab
654 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
656 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
657 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
658 if (cur_op
!= NULL_TREE
)
666 for (i
= 0; i
< nunits
; i
++)
667 vec
[i
] = build_int_cst (TREE_TYPE (type
),
668 ((unsigned HOST_WIDE_INT
) 1
670 mask
= build_vector (type
, vec
);
671 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
672 if (op
!= unknown_optab
673 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
676 /* r = op0 & mask; */
677 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
678 else if (addend
!= NULL_TREE
)
680 /* t1 = op0 + addend;
683 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
684 if (op
!= unknown_optab
685 && optab_handler (op
, TYPE_MODE (type
))
688 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
690 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
692 op
= optab_for_tree_code (MINUS_EXPR
, type
,
694 if (op
!= unknown_optab
695 && optab_handler (op
, TYPE_MODE (type
))
697 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
705 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
708 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
716 gcc_assert (unsignedp
);
717 /* t1 = oprnd0 >> pre_shift;
719 q = t2 >> post_shift; */
720 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
721 if (cur_op
== NULL_TREE
)
725 gcc_assert (unsignedp
);
726 for (i
= 0; i
< nunits
; i
++)
736 gcc_assert (!unsignedp
);
737 for (i
= 0; i
< nunits
; i
++)
738 shift_temps
[i
] = prec
- 1;
744 for (i
= 0; i
< nunits
; i
++)
745 vec
[i
] = build_int_cst (TREE_TYPE (type
), mulc
[i
]);
746 mulcst
= build_vector (type
, vec
);
748 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
753 /* t1 = oprnd0 >> pre_shift;
755 q = t2 >> post_shift; */
756 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
759 /* t1 = oprnd0 h* ml;
763 q = t4 >> (post_shift - 1); */
764 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
765 if (op
== unknown_optab
766 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
768 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
769 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
770 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
771 if (op
== unknown_optab
772 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
774 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
775 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
776 if (cur_op
== NULL_TREE
)
783 /* t1 = oprnd0 h* ml;
784 t2 = t1; [ iff (mode & 2) != 0 ]
785 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
786 t3 = t2 >> post_shift;
787 t4 = oprnd0 >> (prec - 1);
788 q = t3 - t4; [ iff (mode & 1) == 0 ]
789 q = t4 - t3; [ iff (mode & 1) != 0 ] */
792 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
793 if (op
== unknown_optab
794 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
796 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
798 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
799 if (cur_op
== NULL_TREE
)
801 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
802 if (tem
== NULL_TREE
)
804 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
805 if (op
== unknown_optab
806 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
809 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
811 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
817 if (code
== TRUNC_DIV_EXPR
)
820 /* We divided. Now finish by:
823 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
824 if (op
== unknown_optab
825 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
827 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
828 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
829 if (op
== unknown_optab
830 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
832 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
835 /* Expand a vector condition to scalars, by using many conditions
836 on the vector's elements. */
838 expand_vector_condition (gimple_stmt_iterator
*gsi
)
840 gimple stmt
= gsi_stmt (*gsi
);
841 tree type
= gimple_expr_type (stmt
);
842 tree a
= gimple_assign_rhs1 (stmt
);
845 bool a_is_comparison
= false;
846 tree b
= gimple_assign_rhs2 (stmt
);
847 tree c
= gimple_assign_rhs3 (stmt
);
848 vec
<constructor_elt
, va_gc
> *v
;
850 tree inner_type
= TREE_TYPE (type
);
851 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
852 tree comp_inner_type
= cond_type
;
853 tree width
= TYPE_SIZE (inner_type
);
854 tree index
= bitsize_int (0);
855 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
857 location_t loc
= gimple_location (gsi_stmt (*gsi
));
859 if (!is_gimple_val (a
))
861 gcc_assert (COMPARISON_CLASS_P (a
));
862 a_is_comparison
= true;
863 a1
= TREE_OPERAND (a
, 0);
864 a2
= TREE_OPERAND (a
, 1);
865 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
868 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
)))
871 /* TODO: try and find a smaller vector type. */
873 warning_at (loc
, OPT_Wvector_operation_performance
,
874 "vector condition will be expanded piecewise");
876 vec_alloc (v
, nunits
);
877 for (i
= 0; i
< nunits
;
878 i
++, index
= int_const_binop (PLUS_EXPR
, index
, width
))
881 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
882 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
885 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
, width
, index
);
886 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
, width
, index
);
887 aa
= build2 (TREE_CODE (a
), cond_type
, aa1
, aa2
);
890 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
891 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
892 constructor_elt ce
= {NULL_TREE
, result
};
896 constr
= build_constructor (type
, v
);
897 gimple_assign_set_rhs_from_tree (gsi
, constr
);
898 update_stmt (gsi_stmt (*gsi
));
902 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
903 gimple assign
, enum tree_code code
)
905 enum machine_mode compute_mode
= TYPE_MODE (compute_type
);
907 /* If the compute mode is not a vector mode (hence we are not decomposing
908 a BLKmode vector to smaller, hardware-supported vectors), we may want
909 to expand the operations in parallel. */
910 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
911 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
912 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
913 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
914 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
915 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
920 if (!TYPE_OVERFLOW_TRAPS (type
))
921 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
922 gimple_assign_rhs1 (assign
),
923 gimple_assign_rhs2 (assign
), code
);
927 if (!TYPE_OVERFLOW_TRAPS (type
))
928 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
929 gimple_assign_rhs1 (assign
),
936 return expand_vector_parallel (gsi
, do_binop
, type
,
937 gimple_assign_rhs1 (assign
),
938 gimple_assign_rhs2 (assign
), code
);
941 return expand_vector_parallel (gsi
, do_unop
, type
,
942 gimple_assign_rhs1 (assign
),
959 tree rhs1
= gimple_assign_rhs1 (assign
);
960 tree rhs2
= gimple_assign_rhs2 (assign
);
962 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
968 tree rhs1
= gimple_assign_rhs1 (assign
);
969 tree rhs2
= gimple_assign_rhs2 (assign
);
973 || !VECTOR_INTEGER_TYPE_P (type
)
974 || TREE_CODE (rhs2
) != VECTOR_CST
)
977 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
978 if (ret
!= NULL_TREE
)
987 if (TREE_CODE_CLASS (code
) == tcc_unary
)
988 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
989 gimple_assign_rhs1 (assign
),
992 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
993 gimple_assign_rhs1 (assign
),
994 gimple_assign_rhs2 (assign
), code
);
998 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1000 _9 = { b_7, b_7, b_7, b_7 };
1001 a_5 = _9 + { 0, 3, 6, 9 };
1002 because vector splat operation is usually more efficient
1003 than piecewise initialization of the vector. */
1006 optimize_vector_constructor (gimple_stmt_iterator
*gsi
)
1008 gimple stmt
= gsi_stmt (*gsi
);
1009 tree lhs
= gimple_assign_lhs (stmt
);
1010 tree rhs
= gimple_assign_rhs1 (stmt
);
1011 tree type
= TREE_TYPE (rhs
);
1012 unsigned int i
, j
, nelts
= TYPE_VECTOR_SUBPARTS (type
);
1013 bool all_same
= true;
1014 constructor_elt
*elt
;
1017 tree base
= NULL_TREE
;
1020 if (nelts
<= 2 || CONSTRUCTOR_NELTS (rhs
) != nelts
)
1022 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
1023 if (op
== unknown_optab
1024 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
1026 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
1027 if (TREE_CODE (elt
->value
) != SSA_NAME
1028 || TREE_CODE (TREE_TYPE (elt
->value
)) == VECTOR_TYPE
)
1032 tree this_base
= elt
->value
;
1033 if (this_base
!= CONSTRUCTOR_ELT (rhs
, 0)->value
)
1035 for (j
= 0; j
< nelts
+ 1; j
++)
1037 g
= SSA_NAME_DEF_STMT (this_base
);
1038 if (is_gimple_assign (g
)
1039 && gimple_assign_rhs_code (g
) == PLUS_EXPR
1040 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
1041 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
1042 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
1043 this_base
= gimple_assign_rhs1 (g
);
1049 else if (this_base
!= base
)
1054 cst
= XALLOCAVEC (tree
, nelts
);
1055 for (i
= 0; i
< nelts
; i
++)
1057 tree this_base
= CONSTRUCTOR_ELT (rhs
, i
)->value
;;
1058 cst
[i
] = build_zero_cst (TREE_TYPE (base
));
1059 while (this_base
!= base
)
1061 g
= SSA_NAME_DEF_STMT (this_base
);
1062 cst
[i
] = fold_binary (PLUS_EXPR
, TREE_TYPE (base
),
1063 cst
[i
], gimple_assign_rhs2 (g
));
1064 if (cst
[i
] == NULL_TREE
1065 || TREE_CODE (cst
[i
]) != INTEGER_CST
1066 || TREE_OVERFLOW (cst
[i
]))
1068 this_base
= gimple_assign_rhs1 (g
);
1071 for (i
= 0; i
< nelts
; i
++)
1072 CONSTRUCTOR_ELT (rhs
, i
)->value
= base
;
1073 g
= gimple_build_assign (make_ssa_name (type
, NULL
), rhs
);
1074 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1075 g
= gimple_build_assign_with_ops (PLUS_EXPR
, lhs
, gimple_assign_lhs (g
),
1076 build_vector (type
, cst
));
1077 gsi_replace (gsi
, g
, false);
1080 /* Return a type for the widest vector mode whose components are of type
1081 TYPE, or NULL_TREE if none is found. */
1084 type_for_widest_vector_mode (tree type
, optab op
)
1086 enum machine_mode inner_mode
= TYPE_MODE (type
);
1087 enum machine_mode best_mode
= VOIDmode
, mode
;
1088 int best_nunits
= 0;
1090 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1091 mode
= MIN_MODE_VECTOR_FLOAT
;
1092 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1093 mode
= MIN_MODE_VECTOR_FRACT
;
1094 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1095 mode
= MIN_MODE_VECTOR_UFRACT
;
1096 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1097 mode
= MIN_MODE_VECTOR_ACCUM
;
1098 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1099 mode
= MIN_MODE_VECTOR_UACCUM
;
1101 mode
= MIN_MODE_VECTOR_INT
;
1103 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
1104 if (GET_MODE_INNER (mode
) == inner_mode
1105 && GET_MODE_NUNITS (mode
) > best_nunits
1106 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1107 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1109 if (best_mode
== VOIDmode
)
1112 return build_vector_type_for_mode (type
, best_mode
);
1116 /* Build a reference to the element of the vector VECT. Function
1117 returns either the element itself, either BIT_FIELD_REF, or an
1118 ARRAY_REF expression.
1120 GSI is required to insert temporary variables while building a
1121 refernece to the element of the vector VECT.
1123 PTMPVEC is a pointer to the temporary variable for caching
1124 purposes. In case when PTMPVEC is NULL new temporary variable
1127 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1129 tree vect_type
, vect_elt_type
;
1133 bool need_asgn
= true;
1134 unsigned int elements
;
1136 vect_type
= TREE_TYPE (vect
);
1137 vect_elt_type
= TREE_TYPE (vect_type
);
1138 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1140 if (TREE_CODE (idx
) == INTEGER_CST
)
1142 unsigned HOST_WIDE_INT index
;
1144 /* Given that we're about to compute a binary modulus,
1145 we don't care about the high bits of the value. */
1146 index
= TREE_INT_CST_LOW (idx
);
1147 if (!tree_fits_uhwi_p (idx
) || index
>= elements
)
1149 index
&= elements
- 1;
1150 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1153 /* When lowering a vector statement sequence do some easy
1154 simplification by looking through intermediate vector results. */
1155 if (TREE_CODE (vect
) == SSA_NAME
)
1157 gimple def_stmt
= SSA_NAME_DEF_STMT (vect
);
1158 if (is_gimple_assign (def_stmt
)
1159 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1160 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1161 vect
= gimple_assign_rhs1 (def_stmt
);
1164 if (TREE_CODE (vect
) == VECTOR_CST
)
1165 return VECTOR_CST_ELT (vect
, index
);
1166 else if (TREE_CODE (vect
) == CONSTRUCTOR
1167 && (CONSTRUCTOR_NELTS (vect
) == 0
1168 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1171 if (index
< CONSTRUCTOR_NELTS (vect
))
1172 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1173 return build_zero_cst (vect_elt_type
);
1177 tree size
= TYPE_SIZE (vect_elt_type
);
1178 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1180 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1185 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1187 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1196 TREE_ADDRESSABLE (tmpvec
) = 1;
1197 asgn
= gimple_build_assign (tmpvec
, vect
);
1198 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1201 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1202 return build4 (ARRAY_REF
, vect_elt_type
,
1203 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1204 idx
, NULL_TREE
, NULL_TREE
);
1207 /* Check if VEC_PERM_EXPR within the given setting is supported
1208 by hardware, or lower it piecewise.
1210 When VEC_PERM_EXPR has the same first and second operands:
1211 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1212 {v0[mask[0]], v0[mask[1]], ...}
1213 MASK and V0 must have the same number of elements.
1215 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1216 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1217 V0 and V1 must have the same type. MASK, V0, V1 must have the
1218 same number of arguments. */
1221 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1223 gimple stmt
= gsi_stmt (*gsi
);
1224 tree mask
= gimple_assign_rhs3 (stmt
);
1225 tree vec0
= gimple_assign_rhs1 (stmt
);
1226 tree vec1
= gimple_assign_rhs2 (stmt
);
1227 tree vect_type
= TREE_TYPE (vec0
);
1228 tree mask_type
= TREE_TYPE (mask
);
1229 tree vect_elt_type
= TREE_TYPE (vect_type
);
1230 tree mask_elt_type
= TREE_TYPE (mask_type
);
1231 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1232 vec
<constructor_elt
, va_gc
> *v
;
1233 tree constr
, t
, si
, i_val
;
1234 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1235 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1236 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1239 if (TREE_CODE (mask
) == SSA_NAME
)
1241 gimple def_stmt
= SSA_NAME_DEF_STMT (mask
);
1242 if (is_gimple_assign (def_stmt
)
1243 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1244 mask
= gimple_assign_rhs1 (def_stmt
);
1247 if (TREE_CODE (mask
) == VECTOR_CST
)
1249 unsigned char *sel_int
= XALLOCAVEC (unsigned char, elements
);
1251 for (i
= 0; i
< elements
; ++i
)
1252 sel_int
[i
] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask
, i
))
1253 & (2 * elements
- 1));
1255 if (can_vec_perm_p (TYPE_MODE (vect_type
), false, sel_int
))
1257 gimple_assign_set_rhs3 (stmt
, mask
);
1262 else if (can_vec_perm_p (TYPE_MODE (vect_type
), true, NULL
))
1265 warning_at (loc
, OPT_Wvector_operation_performance
,
1266 "vector shuffling operation will be expanded piecewise");
1268 vec_alloc (v
, elements
);
1269 for (i
= 0; i
< elements
; i
++)
1272 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1274 if (TREE_CODE (i_val
) == INTEGER_CST
)
1276 unsigned HOST_WIDE_INT index
;
1278 index
= TREE_INT_CST_LOW (i_val
);
1279 if (!tree_fits_uhwi_p (i_val
) || index
>= elements
)
1280 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1282 if (two_operand_p
&& (index
& elements
) != 0)
1283 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1285 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1287 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1288 true, GSI_SAME_STMT
);
1292 tree cond
= NULL_TREE
, v0_val
;
1296 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1297 build_int_cst (mask_elt_type
, elements
));
1298 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1299 true, GSI_SAME_STMT
);
1302 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1303 build_int_cst (mask_elt_type
, elements
- 1));
1304 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1305 true, GSI_SAME_STMT
);
1307 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1308 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1309 true, GSI_SAME_STMT
);
1315 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1316 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1317 true, GSI_SAME_STMT
);
1319 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1320 cond
, build_zero_cst (mask_elt_type
));
1321 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1322 cond
, v0_val
, v1_val
);
1323 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1324 true, GSI_SAME_STMT
);
1330 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1333 constr
= build_constructor (vect_type
, v
);
1334 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1335 update_stmt (gsi_stmt (*gsi
));
1338 /* Process one statement. If we identify a vector operation, expand it. */
1341 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
1343 gimple stmt
= gsi_stmt (*gsi
);
1344 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
;
1345 enum tree_code code
;
1346 enum machine_mode compute_mode
;
1347 optab op
= unknown_optab
;
1348 enum gimple_rhs_class rhs_class
;
1351 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1354 code
= gimple_assign_rhs_code (stmt
);
1355 rhs_class
= get_gimple_rhs_class (code
);
1356 lhs
= gimple_assign_lhs (stmt
);
1358 if (code
== VEC_PERM_EXPR
)
1360 lower_vec_perm (gsi
);
1364 if (code
== VEC_COND_EXPR
)
1366 expand_vector_condition (gsi
);
1370 if (code
== CONSTRUCTOR
1371 && TREE_CODE (lhs
) == SSA_NAME
1372 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs
)))
1373 && !gimple_clobber_p (stmt
)
1376 optimize_vector_constructor (gsi
);
1380 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
1383 rhs1
= gimple_assign_rhs1 (stmt
);
1384 type
= gimple_expr_type (stmt
);
1385 if (rhs_class
== GIMPLE_BINARY_RHS
)
1386 rhs2
= gimple_assign_rhs2 (stmt
);
1388 if (TREE_CODE (type
) != VECTOR_TYPE
)
1391 if (code
== NOP_EXPR
1392 || code
== FLOAT_EXPR
1393 || code
== FIX_TRUNC_EXPR
1394 || code
== VIEW_CONVERT_EXPR
)
1397 gcc_assert (code
!= CONVERT_EXPR
);
1399 /* The signedness is determined from input argument. */
1400 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
1401 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
1402 type
= TREE_TYPE (rhs1
);
1404 /* For widening/narrowing vector operations, the relevant type is of the
1405 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1406 calculated in the same way above. */
1407 if (code
== WIDEN_SUM_EXPR
1408 || code
== VEC_WIDEN_MULT_HI_EXPR
1409 || code
== VEC_WIDEN_MULT_LO_EXPR
1410 || code
== VEC_WIDEN_MULT_EVEN_EXPR
1411 || code
== VEC_WIDEN_MULT_ODD_EXPR
1412 || code
== VEC_UNPACK_HI_EXPR
1413 || code
== VEC_UNPACK_LO_EXPR
1414 || code
== VEC_PACK_TRUNC_EXPR
1415 || code
== VEC_PACK_SAT_EXPR
1416 || code
== VEC_PACK_FIX_TRUNC_EXPR
1417 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
1418 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
1419 type
= TREE_TYPE (rhs1
);
1421 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1423 if (code
== LSHIFT_EXPR
1424 || code
== RSHIFT_EXPR
1425 || code
== LROTATE_EXPR
1426 || code
== RROTATE_EXPR
)
1430 /* Check whether we have vector <op> {x,x,x,x} where x
1431 could be a scalar variable or a constant. Transform
1432 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1433 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1438 if ((TREE_CODE (rhs2
) == VECTOR_CST
1439 && (first
= uniform_vector_p (rhs2
)) != NULL_TREE
)
1440 || (TREE_CODE (rhs2
) == SSA_NAME
1441 && (def_stmt
= SSA_NAME_DEF_STMT (rhs2
))
1442 && gimple_assign_single_p (def_stmt
)
1443 && (first
= uniform_vector_p
1444 (gimple_assign_rhs1 (def_stmt
))) != NULL_TREE
))
1446 gimple_assign_set_rhs2 (stmt
, first
);
1452 opv
= optab_for_tree_code (code
, type
, optab_vector
);
1453 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1457 op
= optab_for_tree_code (code
, type
, optab_scalar
);
1459 /* The rtl expander will expand vector/scalar as vector/vector
1460 if necessary. Don't bother converting the stmt here. */
1461 if (optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
1462 && optab_handler (opv
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
1467 op
= optab_for_tree_code (code
, type
, optab_default
);
1469 /* Optabs will try converting a negation into a subtraction, so
1470 look for it as well. TODO: negation of floating-point vectors
1471 might be turned into an exclusive OR toggling the sign bit. */
1472 if (op
== unknown_optab
1473 && code
== NEGATE_EXPR
1474 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
1475 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
1477 /* For very wide vectors, try using a smaller vector mode. */
1478 compute_type
= type
;
1479 if (!VECTOR_MODE_P (TYPE_MODE (type
)) && op
)
1481 tree vector_compute_type
1482 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1483 if (vector_compute_type
!= NULL_TREE
1484 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
1485 < TYPE_VECTOR_SUBPARTS (compute_type
))
1486 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1487 != CODE_FOR_nothing
))
1488 compute_type
= vector_compute_type
;
1491 /* If we are breaking a BLKmode vector into smaller pieces,
1492 type_for_widest_vector_mode has already looked into the optab,
1493 so skip these checks. */
1494 if (compute_type
== type
)
1496 compute_mode
= TYPE_MODE (compute_type
);
1497 if (VECTOR_MODE_P (compute_mode
))
1499 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1501 if (code
== MULT_HIGHPART_EXPR
1502 && can_mult_highpart_p (compute_mode
,
1503 TYPE_UNSIGNED (compute_type
)))
1506 /* There is no operation in hardware, so fall back to scalars. */
1507 compute_type
= TREE_TYPE (type
);
1510 gcc_assert (code
!= VEC_LSHIFT_EXPR
&& code
!= VEC_RSHIFT_EXPR
);
1511 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
1513 /* Leave expression untouched for later expansion. */
1514 if (new_rhs
== NULL_TREE
)
1517 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1518 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1521 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1522 way to do it is change expand_vector_operation and its callees to
1523 return a tree_code, RHS1 and RHS2 instead of a tree. */
1524 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1525 update_stmt (gsi_stmt (*gsi
));
1528 /* Use this to lower vector operations introduced by the vectorizer,
1529 if it may need the bit-twiddling tricks implemented in this file. */
1532 gate_expand_vector_operations_ssa (void)
1534 return !(cfun
->curr_properties
& PROP_gimple_lvec
);
1538 expand_vector_operations (void)
1540 gimple_stmt_iterator gsi
;
1542 bool cfg_changed
= false;
1544 FOR_EACH_BB_FN (bb
, cfun
)
1546 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1548 expand_vector_operations_1 (&gsi
);
1549 /* ??? If we do not cleanup EH then we will ICE in
1550 verification. But in reality we have created wrong-code
1551 as we did not properly transition EH info and edges to
1552 the piecewise computations. */
1553 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
1554 && gimple_purge_dead_eh_edges (bb
))
1559 return cfg_changed
? TODO_cleanup_cfg
: 0;
1564 const pass_data pass_data_lower_vector
=
1566 GIMPLE_PASS
, /* type */
1567 "veclower", /* name */
1568 OPTGROUP_VEC
, /* optinfo_flags */
1569 true, /* has_gate */
1570 true, /* has_execute */
1571 TV_NONE
, /* tv_id */
1572 PROP_cfg
, /* properties_required */
1573 PROP_gimple_lvec
, /* properties_provided */
1574 0, /* properties_destroyed */
1575 0, /* todo_flags_start */
1576 ( TODO_update_ssa
| TODO_verify_ssa
1579 | TODO_cleanup_cfg
), /* todo_flags_finish */
1582 class pass_lower_vector
: public gimple_opt_pass
1585 pass_lower_vector (gcc::context
*ctxt
)
1586 : gimple_opt_pass (pass_data_lower_vector
, ctxt
)
1589 /* opt_pass methods: */
1590 bool gate () { return gate_expand_vector_operations_ssa (); }
1591 unsigned int execute () { return expand_vector_operations (); }
1593 }; // class pass_lower_vector
1598 make_pass_lower_vector (gcc::context
*ctxt
)
1600 return new pass_lower_vector (ctxt
);
1605 const pass_data pass_data_lower_vector_ssa
=
1607 GIMPLE_PASS
, /* type */
1608 "veclower2", /* name */
1609 OPTGROUP_VEC
, /* optinfo_flags */
1610 false, /* has_gate */
1611 true, /* has_execute */
1612 TV_NONE
, /* tv_id */
1613 PROP_cfg
, /* properties_required */
1614 PROP_gimple_lvec
, /* properties_provided */
1615 0, /* properties_destroyed */
1616 0, /* todo_flags_start */
1617 ( TODO_update_ssa
| TODO_verify_ssa
1620 | TODO_cleanup_cfg
), /* todo_flags_finish */
1623 class pass_lower_vector_ssa
: public gimple_opt_pass
1626 pass_lower_vector_ssa (gcc::context
*ctxt
)
1627 : gimple_opt_pass (pass_data_lower_vector_ssa
, ctxt
)
1630 /* opt_pass methods: */
1631 opt_pass
* clone () { return new pass_lower_vector_ssa (m_ctxt
); }
1632 unsigned int execute () { return expand_vector_operations (); }
1634 }; // class pass_lower_vector_ssa
1639 make_pass_lower_vector_ssa (gcc::context
*ctxt
)
1641 return new pass_lower_vector_ssa (ctxt
);
1644 #include "gt-tree-vect-generic.h"