1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
26 #include "double-int.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
37 #include "langhooks.h"
39 #include "hard-reg-set.h"
41 #include "dominance.h"
43 #include "basic-block.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
47 #include "gimple-expr.h"
50 #include "gimple-iterator.h"
51 #include "gimplify-me.h"
52 #include "gimple-ssa.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-iterator.h"
57 #include "tree-pass.h"
59 #include "diagnostic.h"
62 /* Need to include rtl.h, expr.h, etc. for optabs. */
65 #include "statistics.h"
67 #include "fixed-value.h"
68 #include "insn-config.h"
77 #include "insn-codes.h"
81 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
84 /* Build a constant of type TYPE, made of VALUE's bits replicated
85 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
87 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
89 int width
= tree_to_uhwi (TYPE_SIZE (inner_type
));
90 int n
= (TYPE_PRECISION (type
) + HOST_BITS_PER_WIDE_INT
- 1)
91 / HOST_BITS_PER_WIDE_INT
;
92 unsigned HOST_WIDE_INT low
, mask
;
93 HOST_WIDE_INT a
[WIDE_INT_MAX_ELTS
];
96 gcc_assert (n
&& n
<= WIDE_INT_MAX_ELTS
);
98 if (width
== HOST_BITS_PER_WIDE_INT
)
102 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
103 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
106 for (i
= 0; i
< n
; i
++)
109 gcc_assert (TYPE_PRECISION (type
) <= MAX_BITSIZE_MODE_ANY_INT
);
110 return wide_int_to_tree
111 (type
, wide_int::from_array (a
, n
, TYPE_PRECISION (type
)));
114 static GTY(()) tree vector_inner_type
;
115 static GTY(()) tree vector_last_type
;
116 static GTY(()) int vector_last_nunits
;
118 /* Return a suitable vector types made of SUBPARTS units each of mode
119 "word_mode" (the global variable). */
121 build_word_mode_vector_type (int nunits
)
123 if (!vector_inner_type
)
124 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
125 else if (vector_last_nunits
== nunits
)
127 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
128 return vector_last_type
;
131 /* We build a new type, but we canonicalize it nevertheless,
132 because it still saves some memory. */
133 vector_last_nunits
= nunits
;
134 vector_last_type
= type_hash_canon (nunits
,
135 build_vector_type (vector_inner_type
,
137 return vector_last_type
;
140 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
141 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
144 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
145 tree t
, tree bitsize
, tree bitpos
)
148 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
150 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
154 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
155 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
158 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
159 return gimplify_build1 (gsi
, code
, inner_type
, a
);
163 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
164 tree bitpos
, tree bitsize
, enum tree_code code
)
166 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
167 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
168 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
169 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
170 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
173 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
175 INNER_TYPE is the type of A and B elements
177 returned expression is of signed integer type with the
178 size equal to the size of INNER_TYPE. */
180 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
181 tree bitpos
, tree bitsize
, enum tree_code code
)
185 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
186 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
188 comp_type
= build_nonstandard_integer_type
189 (GET_MODE_BITSIZE (TYPE_MODE (inner_type
)), 0);
191 return gimplify_build3 (gsi
, COND_EXPR
, comp_type
,
192 fold_build2 (code
, boolean_type_node
, a
, b
),
193 build_int_cst (comp_type
, -1),
194 build_int_cst (comp_type
, 0));
197 /* Expand vector addition to scalars. This does bit twiddling
198 in order to increase parallelism:
200 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
203 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
204 (a ^ ~b) & 0x80808080
206 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
208 This optimization should be done only if 4 vector items or more
211 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
212 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
215 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
216 unsigned HOST_WIDE_INT max
;
217 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
219 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
220 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
221 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
223 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
224 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
226 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
227 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
228 if (code
== PLUS_EXPR
)
229 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
232 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
233 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
236 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
237 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
238 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
242 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
243 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
244 tree bitsize ATTRIBUTE_UNUSED
,
245 enum tree_code code ATTRIBUTE_UNUSED
)
247 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
249 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
251 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
252 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
253 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
255 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
257 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
258 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
259 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
260 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
261 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
264 /* Expand a vector operation to scalars, by using many operations
265 whose type is the vector type's inner type. */
267 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
268 tree type
, tree inner_type
,
269 tree a
, tree b
, enum tree_code code
)
271 vec
<constructor_elt
, va_gc
> *v
;
272 tree part_width
= TYPE_SIZE (inner_type
);
273 tree index
= bitsize_int (0);
274 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
275 int delta
= tree_to_uhwi (part_width
)
276 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type
)));
278 location_t loc
= gimple_location (gsi_stmt (*gsi
));
280 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
281 warning_at (loc
, OPT_Wvector_operation_performance
,
282 "vector operation will be expanded piecewise");
284 warning_at (loc
, OPT_Wvector_operation_performance
,
285 "vector operation will be expanded in parallel");
287 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
288 for (i
= 0; i
< nunits
;
289 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
291 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
);
292 constructor_elt ce
= {NULL_TREE
, result
};
296 return build_constructor (type
, v
);
299 /* Expand a vector operation to scalars with the freedom to use
300 a scalar integer type, or to use a different size for the items
301 in the vector type. */
303 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
307 tree result
, compute_type
;
309 int n_words
= tree_to_uhwi (TYPE_SIZE_UNIT (type
)) / UNITS_PER_WORD
;
310 location_t loc
= gimple_location (gsi_stmt (*gsi
));
312 /* We have three strategies. If the type is already correct, just do
313 the operation an element at a time. Else, if the vector is wider than
314 one word, do it a word at a time; finally, if the vector is smaller
315 than one word, do it as a scalar. */
316 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
317 return expand_vector_piecewise (gsi
, f
,
318 type
, TREE_TYPE (type
),
320 else if (n_words
> 1)
322 tree word_type
= build_word_mode_vector_type (n_words
);
323 result
= expand_vector_piecewise (gsi
, f
,
324 word_type
, TREE_TYPE (word_type
),
326 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
331 /* Use a single scalar operation with a mode no wider than word_mode. */
332 mode
= mode_for_size (tree_to_uhwi (TYPE_SIZE (type
)), MODE_INT
, 0);
333 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
334 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
335 warning_at (loc
, OPT_Wvector_operation_performance
,
336 "vector operation will be expanded with a "
337 "single scalar operation");
343 /* Expand a vector operation to scalars; for integer types we can use
344 special bit twiddling tricks to do the sums a word at a time, using
345 function F_PARALLEL instead of F. These tricks are done only if
346 they can process at least four items, that is, only if the vector
347 holds at least four items and if a word can hold four items. */
349 expand_vector_addition (gimple_stmt_iterator
*gsi
,
350 elem_op_func f
, elem_op_func f_parallel
,
351 tree type
, tree a
, tree b
, enum tree_code code
)
353 int parts_per_word
= UNITS_PER_WORD
354 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
356 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
357 && parts_per_word
>= 4
358 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
359 return expand_vector_parallel (gsi
, f_parallel
,
362 return expand_vector_piecewise (gsi
, f
,
363 type
, TREE_TYPE (type
),
367 /* Try to expand vector comparison expression OP0 CODE OP1 by
368 querying optab if the following expression:
369 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
372 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
373 tree op1
, enum tree_code code
)
376 if (! expand_vec_cond_expr_p (type
, TREE_TYPE (op0
)))
377 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
378 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
385 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
386 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
387 the result if successful, otherwise return NULL_TREE. */
389 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
392 unsigned int i
, nunits
= TYPE_VECTOR_SUBPARTS (type
);
393 bool scalar_shift
= true;
395 for (i
= 1; i
< nunits
; i
++)
397 if (shiftcnts
[i
] != shiftcnts
[0])
398 scalar_shift
= false;
401 if (scalar_shift
&& shiftcnts
[0] == 0)
406 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
407 if (op
!= unknown_optab
408 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
409 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
410 build_int_cst (NULL_TREE
, shiftcnts
[0]));
413 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
414 if (op
!= unknown_optab
415 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
417 tree
*vec
= XALLOCAVEC (tree
, nunits
);
418 for (i
= 0; i
< nunits
; i
++)
419 vec
[i
] = build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]);
420 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
421 build_vector (type
, vec
));
427 /* Try to expand integer vector division by constant using
428 widening multiply, shifts and additions. */
430 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
431 tree op1
, enum tree_code code
)
433 bool use_pow2
= true;
434 bool has_vector_shift
= true;
435 int mode
= -1, this_mode
;
436 int pre_shift
= -1, post_shift
;
437 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (type
);
438 int *shifts
= XALLOCAVEC (int, nunits
* 4);
439 int *pre_shifts
= shifts
+ nunits
;
440 int *post_shifts
= pre_shifts
+ nunits
;
441 int *shift_temps
= post_shifts
+ nunits
;
442 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
443 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
446 signop sign_p
= TYPE_SIGN (TREE_TYPE (type
));
447 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
449 tree cur_op
, mulcst
, tem
;
452 if (prec
> HOST_BITS_PER_WIDE_INT
)
455 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
456 if (op
== unknown_optab
457 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
458 has_vector_shift
= false;
460 /* Analysis phase. Determine if all op1 elements are either power
461 of two and it is possible to expand it using shifts (or for remainder
462 using masking). Additionally compute the multiplicative constants
463 and pre and post shifts if the division is to be expanded using
464 widening or high part multiplication plus shifts. */
465 for (i
= 0; i
< nunits
; i
++)
467 tree cst
= VECTOR_CST_ELT (op1
, i
);
468 unsigned HOST_WIDE_INT ml
;
470 if (TREE_CODE (cst
) != INTEGER_CST
|| integer_zerop (cst
))
476 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
480 shifts
[i
] = tree_log2 (cst
);
481 if (shifts
[i
] != shifts
[0]
482 && code
== TRUNC_DIV_EXPR
483 && !has_vector_shift
)
488 if (sign_p
== UNSIGNED
)
490 unsigned HOST_WIDE_INT mh
;
491 unsigned HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
) & mask
;
493 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
494 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
503 /* Find a suitable multiplier and right shift count
504 instead of multiplying with D. */
505 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
507 /* If the suggested multiplier is more than SIZE bits, we can
508 do better for even divisors, using an initial right shift. */
509 if ((mh
!= 0 && (d
& 1) == 0)
510 || (!has_vector_shift
&& pre_shift
!= -1))
512 if (has_vector_shift
)
513 pre_shift
= floor_log2 (d
& -d
);
514 else if (pre_shift
== -1)
517 for (j
= 0; j
< nunits
; j
++)
519 tree cst2
= VECTOR_CST_ELT (op1
, j
);
520 unsigned HOST_WIDE_INT d2
;
523 if (!tree_fits_uhwi_p (cst2
))
525 d2
= tree_to_uhwi (cst2
) & mask
;
528 this_pre_shift
= floor_log2 (d2
& -d2
);
529 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
530 pre_shift
= this_pre_shift
;
532 if (i
!= 0 && pre_shift
!= 0)
542 if ((d
>> pre_shift
) <= 1)
547 mh
= choose_multiplier (d
>> pre_shift
, prec
,
549 &ml
, &post_shift
, &dummy_int
);
551 pre_shifts
[i
] = pre_shift
;
561 HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
);
562 unsigned HOST_WIDE_INT abs_d
;
567 /* Since d might be INT_MIN, we have to cast to
568 unsigned HOST_WIDE_INT before negating to avoid
569 undefined signed overflow. */
571 ? (unsigned HOST_WIDE_INT
) d
572 : - (unsigned HOST_WIDE_INT
) d
);
574 /* n rem d = n rem -d */
575 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
577 else if (abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
579 /* This case is not handled correctly below. */
589 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
590 &post_shift
, &dummy_int
);
591 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
593 this_mode
= 4 + (d
< 0);
594 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
597 this_mode
= 2 + (d
< 0);
600 post_shifts
[i
] = post_shift
;
601 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
602 || post_shift
>= prec
603 || pre_shifts
[i
] >= prec
)
608 else if (mode
!= this_mode
)
612 vec
= XALLOCAVEC (tree
, nunits
);
616 tree addend
= NULL_TREE
;
617 if (sign_p
== SIGNED
)
621 /* Both division and remainder sequences need
622 op0 < 0 ? mask : 0 computed. It can be either computed as
623 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
624 if none of the shifts is 0, or as the conditional. */
625 for (i
= 0; i
< nunits
; i
++)
629 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
631 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
633 for (i
= 0; i
< nunits
; i
++)
634 shift_temps
[i
] = prec
- 1;
635 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
636 if (cur_op
!= NULL_TREE
)
638 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
640 for (i
= 0; i
< nunits
; i
++)
641 shift_temps
[i
] = prec
- shifts
[i
];
642 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
643 if (cur_op
!= NULL_TREE
)
644 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
648 if (addend
== NULL_TREE
649 && expand_vec_cond_expr_p (type
, type
))
651 tree zero
, cst
, cond
;
654 zero
= build_zero_cst (type
);
655 cond
= build2 (LT_EXPR
, type
, op0
, zero
);
656 for (i
= 0; i
< nunits
; i
++)
657 vec
[i
] = build_int_cst (TREE_TYPE (type
),
658 ((unsigned HOST_WIDE_INT
) 1
660 cst
= build_vector (type
, vec
);
661 addend
= make_ssa_name (type
);
662 stmt
= gimple_build_assign (addend
, VEC_COND_EXPR
, cond
,
664 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
667 if (code
== TRUNC_DIV_EXPR
)
669 if (sign_p
== UNSIGNED
)
671 /* q = op0 >> shift; */
672 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
673 if (cur_op
!= NULL_TREE
)
676 else if (addend
!= NULL_TREE
)
678 /* t1 = op0 + addend;
680 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
681 if (op
!= unknown_optab
682 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
684 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
685 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
686 if (cur_op
!= NULL_TREE
)
694 for (i
= 0; i
< nunits
; i
++)
695 vec
[i
] = build_int_cst (TREE_TYPE (type
),
696 ((unsigned HOST_WIDE_INT
) 1
698 mask
= build_vector (type
, vec
);
699 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
700 if (op
!= unknown_optab
701 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
703 if (sign_p
== UNSIGNED
)
704 /* r = op0 & mask; */
705 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
706 else if (addend
!= NULL_TREE
)
708 /* t1 = op0 + addend;
711 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
712 if (op
!= unknown_optab
713 && optab_handler (op
, TYPE_MODE (type
))
716 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
718 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
720 op
= optab_for_tree_code (MINUS_EXPR
, type
,
722 if (op
!= unknown_optab
723 && optab_handler (op
, TYPE_MODE (type
))
725 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
733 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
736 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
744 gcc_assert (sign_p
== UNSIGNED
);
745 /* t1 = oprnd0 >> pre_shift;
747 q = t2 >> post_shift; */
748 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
749 if (cur_op
== NULL_TREE
)
753 gcc_assert (sign_p
== UNSIGNED
);
754 for (i
= 0; i
< nunits
; i
++)
764 gcc_assert (sign_p
== SIGNED
);
765 for (i
= 0; i
< nunits
; i
++)
766 shift_temps
[i
] = prec
- 1;
772 for (i
= 0; i
< nunits
; i
++)
773 vec
[i
] = build_int_cst (TREE_TYPE (type
), mulc
[i
]);
774 mulcst
= build_vector (type
, vec
);
776 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
781 /* t1 = oprnd0 >> pre_shift;
783 q = t2 >> post_shift; */
784 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
787 /* t1 = oprnd0 h* ml;
791 q = t4 >> (post_shift - 1); */
792 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
793 if (op
== unknown_optab
794 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
796 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
797 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
798 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
799 if (op
== unknown_optab
800 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
802 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
803 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
804 if (cur_op
== NULL_TREE
)
811 /* t1 = oprnd0 h* ml;
812 t2 = t1; [ iff (mode & 2) != 0 ]
813 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
814 t3 = t2 >> post_shift;
815 t4 = oprnd0 >> (prec - 1);
816 q = t3 - t4; [ iff (mode & 1) == 0 ]
817 q = t4 - t3; [ iff (mode & 1) != 0 ] */
820 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
821 if (op
== unknown_optab
822 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
824 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
826 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
827 if (cur_op
== NULL_TREE
)
829 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
830 if (tem
== NULL_TREE
)
832 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
833 if (op
== unknown_optab
834 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
837 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
839 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
845 if (code
== TRUNC_DIV_EXPR
)
848 /* We divided. Now finish by:
851 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
852 if (op
== unknown_optab
853 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
855 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
856 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
857 if (op
== unknown_optab
858 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
860 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
863 /* Expand a vector condition to scalars, by using many conditions
864 on the vector's elements. */
866 expand_vector_condition (gimple_stmt_iterator
*gsi
)
868 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
869 tree type
= gimple_expr_type (stmt
);
870 tree a
= gimple_assign_rhs1 (stmt
);
873 bool a_is_comparison
= false;
874 tree b
= gimple_assign_rhs2 (stmt
);
875 tree c
= gimple_assign_rhs3 (stmt
);
876 vec
<constructor_elt
, va_gc
> *v
;
878 tree inner_type
= TREE_TYPE (type
);
879 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
880 tree comp_inner_type
= cond_type
;
881 tree width
= TYPE_SIZE (inner_type
);
882 tree index
= bitsize_int (0);
883 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
885 location_t loc
= gimple_location (gsi_stmt (*gsi
));
887 if (!is_gimple_val (a
))
889 gcc_assert (COMPARISON_CLASS_P (a
));
890 a_is_comparison
= true;
891 a1
= TREE_OPERAND (a
, 0);
892 a2
= TREE_OPERAND (a
, 1);
893 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
896 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
)))
899 /* TODO: try and find a smaller vector type. */
901 warning_at (loc
, OPT_Wvector_operation_performance
,
902 "vector condition will be expanded piecewise");
904 vec_alloc (v
, nunits
);
905 for (i
= 0; i
< nunits
;
906 i
++, index
= int_const_binop (PLUS_EXPR
, index
, width
))
909 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
910 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
913 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
, width
, index
);
914 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
, width
, index
);
915 aa
= build2 (TREE_CODE (a
), cond_type
, aa1
, aa2
);
918 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
919 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
920 constructor_elt ce
= {NULL_TREE
, result
};
924 constr
= build_constructor (type
, v
);
925 gimple_assign_set_rhs_from_tree (gsi
, constr
);
926 update_stmt (gsi_stmt (*gsi
));
930 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
931 gassign
*assign
, enum tree_code code
)
933 machine_mode compute_mode
= TYPE_MODE (compute_type
);
935 /* If the compute mode is not a vector mode (hence we are not decomposing
936 a BLKmode vector to smaller, hardware-supported vectors), we may want
937 to expand the operations in parallel. */
938 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
939 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
940 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
941 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
942 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
943 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
948 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
949 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
950 gimple_assign_rhs1 (assign
),
951 gimple_assign_rhs2 (assign
), code
);
955 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
956 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
957 gimple_assign_rhs1 (assign
),
964 return expand_vector_parallel (gsi
, do_binop
, type
,
965 gimple_assign_rhs1 (assign
),
966 gimple_assign_rhs2 (assign
), code
);
969 return expand_vector_parallel (gsi
, do_unop
, type
,
970 gimple_assign_rhs1 (assign
),
987 tree rhs1
= gimple_assign_rhs1 (assign
);
988 tree rhs2
= gimple_assign_rhs2 (assign
);
990 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
996 tree rhs1
= gimple_assign_rhs1 (assign
);
997 tree rhs2
= gimple_assign_rhs2 (assign
);
1001 || !VECTOR_INTEGER_TYPE_P (type
)
1002 || TREE_CODE (rhs2
) != VECTOR_CST
1003 || !VECTOR_MODE_P (TYPE_MODE (type
)))
1006 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
1007 if (ret
!= NULL_TREE
)
1016 if (TREE_CODE_CLASS (code
) == tcc_unary
)
1017 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
1018 gimple_assign_rhs1 (assign
),
1021 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
1022 gimple_assign_rhs1 (assign
),
1023 gimple_assign_rhs2 (assign
), code
);
1027 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1029 _9 = { b_7, b_7, b_7, b_7 };
1030 a_5 = _9 + { 0, 3, 6, 9 };
1031 because vector splat operation is usually more efficient
1032 than piecewise initialization of the vector. */
1035 optimize_vector_constructor (gimple_stmt_iterator
*gsi
)
1037 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1038 tree lhs
= gimple_assign_lhs (stmt
);
1039 tree rhs
= gimple_assign_rhs1 (stmt
);
1040 tree type
= TREE_TYPE (rhs
);
1041 unsigned int i
, j
, nelts
= TYPE_VECTOR_SUBPARTS (type
);
1042 bool all_same
= true;
1043 constructor_elt
*elt
;
1046 tree base
= NULL_TREE
;
1049 if (nelts
<= 2 || CONSTRUCTOR_NELTS (rhs
) != nelts
)
1051 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
1052 if (op
== unknown_optab
1053 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
1055 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
1056 if (TREE_CODE (elt
->value
) != SSA_NAME
1057 || TREE_CODE (TREE_TYPE (elt
->value
)) == VECTOR_TYPE
)
1061 tree this_base
= elt
->value
;
1062 if (this_base
!= CONSTRUCTOR_ELT (rhs
, 0)->value
)
1064 for (j
= 0; j
< nelts
+ 1; j
++)
1066 g
= SSA_NAME_DEF_STMT (this_base
);
1067 if (is_gimple_assign (g
)
1068 && gimple_assign_rhs_code (g
) == PLUS_EXPR
1069 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
1070 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
1071 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
1072 this_base
= gimple_assign_rhs1 (g
);
1078 else if (this_base
!= base
)
1083 cst
= XALLOCAVEC (tree
, nelts
);
1084 for (i
= 0; i
< nelts
; i
++)
1086 tree this_base
= CONSTRUCTOR_ELT (rhs
, i
)->value
;;
1087 cst
[i
] = build_zero_cst (TREE_TYPE (base
));
1088 while (this_base
!= base
)
1090 g
= SSA_NAME_DEF_STMT (this_base
);
1091 cst
[i
] = fold_binary (PLUS_EXPR
, TREE_TYPE (base
),
1092 cst
[i
], gimple_assign_rhs2 (g
));
1093 if (cst
[i
] == NULL_TREE
1094 || TREE_CODE (cst
[i
]) != INTEGER_CST
1095 || TREE_OVERFLOW (cst
[i
]))
1097 this_base
= gimple_assign_rhs1 (g
);
1100 for (i
= 0; i
< nelts
; i
++)
1101 CONSTRUCTOR_ELT (rhs
, i
)->value
= base
;
1102 g
= gimple_build_assign (make_ssa_name (type
), rhs
);
1103 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1104 g
= gimple_build_assign (lhs
, PLUS_EXPR
, gimple_assign_lhs (g
),
1105 build_vector (type
, cst
));
1106 gsi_replace (gsi
, g
, false);
1109 /* Return a type for the widest vector mode whose components are of type
1110 TYPE, or NULL_TREE if none is found. */
1113 type_for_widest_vector_mode (tree type
, optab op
)
1115 machine_mode inner_mode
= TYPE_MODE (type
);
1116 machine_mode best_mode
= VOIDmode
, mode
;
1117 int best_nunits
= 0;
1119 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1120 mode
= MIN_MODE_VECTOR_FLOAT
;
1121 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1122 mode
= MIN_MODE_VECTOR_FRACT
;
1123 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1124 mode
= MIN_MODE_VECTOR_UFRACT
;
1125 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1126 mode
= MIN_MODE_VECTOR_ACCUM
;
1127 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1128 mode
= MIN_MODE_VECTOR_UACCUM
;
1130 mode
= MIN_MODE_VECTOR_INT
;
1132 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
1133 if (GET_MODE_INNER (mode
) == inner_mode
1134 && GET_MODE_NUNITS (mode
) > best_nunits
1135 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1136 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1138 if (best_mode
== VOIDmode
)
1141 return build_vector_type_for_mode (type
, best_mode
);
1145 /* Build a reference to the element of the vector VECT. Function
1146 returns either the element itself, either BIT_FIELD_REF, or an
1147 ARRAY_REF expression.
1149 GSI is required to insert temporary variables while building a
1150 refernece to the element of the vector VECT.
1152 PTMPVEC is a pointer to the temporary variable for caching
1153 purposes. In case when PTMPVEC is NULL new temporary variable
1156 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1158 tree vect_type
, vect_elt_type
;
1162 bool need_asgn
= true;
1163 unsigned int elements
;
1165 vect_type
= TREE_TYPE (vect
);
1166 vect_elt_type
= TREE_TYPE (vect_type
);
1167 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1169 if (TREE_CODE (idx
) == INTEGER_CST
)
1171 unsigned HOST_WIDE_INT index
;
1173 /* Given that we're about to compute a binary modulus,
1174 we don't care about the high bits of the value. */
1175 index
= TREE_INT_CST_LOW (idx
);
1176 if (!tree_fits_uhwi_p (idx
) || index
>= elements
)
1178 index
&= elements
- 1;
1179 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1182 /* When lowering a vector statement sequence do some easy
1183 simplification by looking through intermediate vector results. */
1184 if (TREE_CODE (vect
) == SSA_NAME
)
1186 gimple def_stmt
= SSA_NAME_DEF_STMT (vect
);
1187 if (is_gimple_assign (def_stmt
)
1188 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1189 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1190 vect
= gimple_assign_rhs1 (def_stmt
);
1193 if (TREE_CODE (vect
) == VECTOR_CST
)
1194 return VECTOR_CST_ELT (vect
, index
);
1195 else if (TREE_CODE (vect
) == CONSTRUCTOR
1196 && (CONSTRUCTOR_NELTS (vect
) == 0
1197 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1200 if (index
< CONSTRUCTOR_NELTS (vect
))
1201 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1202 return build_zero_cst (vect_elt_type
);
1206 tree size
= TYPE_SIZE (vect_elt_type
);
1207 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1209 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1214 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1216 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1225 TREE_ADDRESSABLE (tmpvec
) = 1;
1226 asgn
= gimple_build_assign (tmpvec
, vect
);
1227 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1230 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1231 return build4 (ARRAY_REF
, vect_elt_type
,
1232 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1233 idx
, NULL_TREE
, NULL_TREE
);
1236 /* Check if VEC_PERM_EXPR within the given setting is supported
1237 by hardware, or lower it piecewise.
1239 When VEC_PERM_EXPR has the same first and second operands:
1240 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1241 {v0[mask[0]], v0[mask[1]], ...}
1242 MASK and V0 must have the same number of elements.
1244 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1245 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1246 V0 and V1 must have the same type. MASK, V0, V1 must have the
1247 same number of arguments. */
1250 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1252 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1253 tree mask
= gimple_assign_rhs3 (stmt
);
1254 tree vec0
= gimple_assign_rhs1 (stmt
);
1255 tree vec1
= gimple_assign_rhs2 (stmt
);
1256 tree vect_type
= TREE_TYPE (vec0
);
1257 tree mask_type
= TREE_TYPE (mask
);
1258 tree vect_elt_type
= TREE_TYPE (vect_type
);
1259 tree mask_elt_type
= TREE_TYPE (mask_type
);
1260 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1261 vec
<constructor_elt
, va_gc
> *v
;
1262 tree constr
, t
, si
, i_val
;
1263 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1264 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1265 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1268 if (TREE_CODE (mask
) == SSA_NAME
)
1270 gimple def_stmt
= SSA_NAME_DEF_STMT (mask
);
1271 if (is_gimple_assign (def_stmt
)
1272 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1273 mask
= gimple_assign_rhs1 (def_stmt
);
1276 if (TREE_CODE (mask
) == VECTOR_CST
)
1278 unsigned char *sel_int
= XALLOCAVEC (unsigned char, elements
);
1280 for (i
= 0; i
< elements
; ++i
)
1281 sel_int
[i
] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask
, i
))
1282 & (2 * elements
- 1));
1284 if (can_vec_perm_p (TYPE_MODE (vect_type
), false, sel_int
))
1286 gimple_assign_set_rhs3 (stmt
, mask
);
1291 else if (can_vec_perm_p (TYPE_MODE (vect_type
), true, NULL
))
1294 warning_at (loc
, OPT_Wvector_operation_performance
,
1295 "vector shuffling operation will be expanded piecewise");
1297 vec_alloc (v
, elements
);
1298 for (i
= 0; i
< elements
; i
++)
1301 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1303 if (TREE_CODE (i_val
) == INTEGER_CST
)
1305 unsigned HOST_WIDE_INT index
;
1307 index
= TREE_INT_CST_LOW (i_val
);
1308 if (!tree_fits_uhwi_p (i_val
) || index
>= elements
)
1309 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1311 if (two_operand_p
&& (index
& elements
) != 0)
1312 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1314 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1316 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1317 true, GSI_SAME_STMT
);
1321 tree cond
= NULL_TREE
, v0_val
;
1325 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1326 build_int_cst (mask_elt_type
, elements
));
1327 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1328 true, GSI_SAME_STMT
);
1331 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1332 build_int_cst (mask_elt_type
, elements
- 1));
1333 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1334 true, GSI_SAME_STMT
);
1336 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1337 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1338 true, GSI_SAME_STMT
);
1344 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1345 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1346 true, GSI_SAME_STMT
);
1348 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1349 cond
, build_zero_cst (mask_elt_type
));
1350 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1351 cond
, v0_val
, v1_val
);
1352 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1353 true, GSI_SAME_STMT
);
1359 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1362 constr
= build_constructor (vect_type
, v
);
1363 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1364 update_stmt (gsi_stmt (*gsi
));
1367 /* Return type in which CODE operation with optab OP can be
1371 get_compute_type (enum tree_code code
, optab op
, tree type
)
1373 /* For very wide vectors, try using a smaller vector mode. */
1374 tree compute_type
= type
;
1376 && (!VECTOR_MODE_P (TYPE_MODE (type
))
1377 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
))
1379 tree vector_compute_type
1380 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1381 if (vector_compute_type
!= NULL_TREE
1382 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
1383 < TYPE_VECTOR_SUBPARTS (compute_type
))
1384 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1385 != CODE_FOR_nothing
))
1386 compute_type
= vector_compute_type
;
1389 /* If we are breaking a BLKmode vector into smaller pieces,
1390 type_for_widest_vector_mode has already looked into the optab,
1391 so skip these checks. */
1392 if (compute_type
== type
)
1394 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1395 if (VECTOR_MODE_P (compute_mode
))
1397 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1398 return compute_type
;
1399 if (code
== MULT_HIGHPART_EXPR
1400 && can_mult_highpart_p (compute_mode
,
1401 TYPE_UNSIGNED (compute_type
)))
1402 return compute_type
;
1404 /* There is no operation in hardware, so fall back to scalars. */
1405 compute_type
= TREE_TYPE (type
);
1408 return compute_type
;
1411 /* Helper function of expand_vector_operations_1. Return number of
1412 vector elements for vector types or 1 for other types. */
1415 count_type_subparts (tree type
)
1417 return VECTOR_TYPE_P (type
) ? TYPE_VECTOR_SUBPARTS (type
) : 1;
1420 /* Process one statement. If we identify a vector operation, expand it. */
1423 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
1425 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
= NULL_TREE
;
1426 enum tree_code code
;
1427 optab op
= unknown_optab
;
1428 enum gimple_rhs_class rhs_class
;
1431 /* Only consider code == GIMPLE_ASSIGN. */
1432 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (*gsi
));
1436 code
= gimple_assign_rhs_code (stmt
);
1437 rhs_class
= get_gimple_rhs_class (code
);
1438 lhs
= gimple_assign_lhs (stmt
);
1440 if (code
== VEC_PERM_EXPR
)
1442 lower_vec_perm (gsi
);
1446 if (code
== VEC_COND_EXPR
)
1448 expand_vector_condition (gsi
);
1452 if (code
== CONSTRUCTOR
1453 && TREE_CODE (lhs
) == SSA_NAME
1454 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs
)))
1455 && !gimple_clobber_p (stmt
)
1458 optimize_vector_constructor (gsi
);
1462 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
1465 rhs1
= gimple_assign_rhs1 (stmt
);
1466 type
= gimple_expr_type (stmt
);
1467 if (rhs_class
== GIMPLE_BINARY_RHS
)
1468 rhs2
= gimple_assign_rhs2 (stmt
);
1470 if (TREE_CODE (type
) != VECTOR_TYPE
)
1473 if (CONVERT_EXPR_CODE_P (code
)
1474 || code
== FLOAT_EXPR
1475 || code
== FIX_TRUNC_EXPR
1476 || code
== VIEW_CONVERT_EXPR
)
1479 /* The signedness is determined from input argument. */
1480 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
1481 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
1482 type
= TREE_TYPE (rhs1
);
1484 /* For widening/narrowing vector operations, the relevant type is of the
1485 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1486 calculated in the same way above. */
1487 if (code
== WIDEN_SUM_EXPR
1488 || code
== VEC_WIDEN_MULT_HI_EXPR
1489 || code
== VEC_WIDEN_MULT_LO_EXPR
1490 || code
== VEC_WIDEN_MULT_EVEN_EXPR
1491 || code
== VEC_WIDEN_MULT_ODD_EXPR
1492 || code
== VEC_UNPACK_HI_EXPR
1493 || code
== VEC_UNPACK_LO_EXPR
1494 || code
== VEC_PACK_TRUNC_EXPR
1495 || code
== VEC_PACK_SAT_EXPR
1496 || code
== VEC_PACK_FIX_TRUNC_EXPR
1497 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
1498 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
1499 type
= TREE_TYPE (rhs1
);
1501 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1503 if (code
== LSHIFT_EXPR
1504 || code
== RSHIFT_EXPR
1505 || code
== LROTATE_EXPR
1506 || code
== RROTATE_EXPR
)
1510 /* Check whether we have vector <op> {x,x,x,x} where x
1511 could be a scalar variable or a constant. Transform
1512 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1513 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1518 if ((TREE_CODE (rhs2
) == VECTOR_CST
1519 && (first
= uniform_vector_p (rhs2
)) != NULL_TREE
)
1520 || (TREE_CODE (rhs2
) == SSA_NAME
1521 && (def_stmt
= SSA_NAME_DEF_STMT (rhs2
))
1522 && gimple_assign_single_p (def_stmt
)
1523 && (first
= uniform_vector_p
1524 (gimple_assign_rhs1 (def_stmt
))) != NULL_TREE
))
1526 gimple_assign_set_rhs2 (stmt
, first
);
1532 opv
= optab_for_tree_code (code
, type
, optab_vector
);
1533 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1537 op
= optab_for_tree_code (code
, type
, optab_scalar
);
1539 compute_type
= get_compute_type (code
, op
, type
);
1540 if (compute_type
== type
)
1542 /* The rtl expander will expand vector/scalar as vector/vector
1543 if necessary. Pick one with wider vector type. */
1544 tree compute_vtype
= get_compute_type (code
, opv
, type
);
1545 if (count_type_subparts (compute_vtype
)
1546 > count_type_subparts (compute_type
))
1548 compute_type
= compute_vtype
;
1553 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1555 if (compute_type
== NULL_TREE
)
1556 compute_type
= get_compute_type (code
, op
, type
);
1557 if (compute_type
== type
)
1559 /* Before splitting vector rotates into scalar rotates,
1560 see if we can't use vector shifts and BIT_IOR_EXPR
1561 instead. For vector by vector rotates we'd also
1562 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1563 for now, fold doesn't seem to create such rotates anyway. */
1564 if (compute_type
== TREE_TYPE (type
)
1565 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1567 optab oplv
= vashl_optab
, opl
= ashl_optab
;
1568 optab oprv
= vlshr_optab
, opr
= lshr_optab
, opo
= ior_optab
;
1569 tree compute_lvtype
= get_compute_type (LSHIFT_EXPR
, oplv
, type
);
1570 tree compute_rvtype
= get_compute_type (RSHIFT_EXPR
, oprv
, type
);
1571 tree compute_otype
= get_compute_type (BIT_IOR_EXPR
, opo
, type
);
1572 tree compute_ltype
= get_compute_type (LSHIFT_EXPR
, opl
, type
);
1573 tree compute_rtype
= get_compute_type (RSHIFT_EXPR
, opr
, type
);
1574 /* The rtl expander will expand vector/scalar as vector/vector
1575 if necessary. Pick one with wider vector type. */
1576 if (count_type_subparts (compute_lvtype
)
1577 > count_type_subparts (compute_ltype
))
1579 compute_ltype
= compute_lvtype
;
1582 if (count_type_subparts (compute_rvtype
)
1583 > count_type_subparts (compute_rtype
))
1585 compute_rtype
= compute_rvtype
;
1588 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1590 compute_type
= compute_ltype
;
1591 if (count_type_subparts (compute_type
)
1592 > count_type_subparts (compute_rtype
))
1593 compute_type
= compute_rtype
;
1594 if (count_type_subparts (compute_type
)
1595 > count_type_subparts (compute_otype
))
1596 compute_type
= compute_otype
;
1597 /* Verify all 3 operations can be performed in that type. */
1598 if (compute_type
!= TREE_TYPE (type
))
1600 if (optab_handler (opl
, TYPE_MODE (compute_type
))
1602 || optab_handler (opr
, TYPE_MODE (compute_type
))
1604 || optab_handler (opo
, TYPE_MODE (compute_type
))
1605 == CODE_FOR_nothing
)
1606 compute_type
= TREE_TYPE (type
);
1612 op
= optab_for_tree_code (code
, type
, optab_default
);
1614 /* Optabs will try converting a negation into a subtraction, so
1615 look for it as well. TODO: negation of floating-point vectors
1616 might be turned into an exclusive OR toggling the sign bit. */
1617 if (op
== unknown_optab
1618 && code
== NEGATE_EXPR
1619 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
1620 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
1622 if (compute_type
== NULL_TREE
)
1623 compute_type
= get_compute_type (code
, op
, type
);
1624 if (compute_type
== type
)
1627 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
1629 /* Leave expression untouched for later expansion. */
1630 if (new_rhs
== NULL_TREE
)
1633 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1634 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1637 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1638 way to do it is change expand_vector_operation and its callees to
1639 return a tree_code, RHS1 and RHS2 instead of a tree. */
1640 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1641 update_stmt (gsi_stmt (*gsi
));
1644 /* Use this to lower vector operations introduced by the vectorizer,
1645 if it may need the bit-twiddling tricks implemented in this file. */
1648 expand_vector_operations (void)
1650 gimple_stmt_iterator gsi
;
1652 bool cfg_changed
= false;
1654 FOR_EACH_BB_FN (bb
, cfun
)
1656 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1658 expand_vector_operations_1 (&gsi
);
1659 /* ??? If we do not cleanup EH then we will ICE in
1660 verification. But in reality we have created wrong-code
1661 as we did not properly transition EH info and edges to
1662 the piecewise computations. */
1663 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
1664 && gimple_purge_dead_eh_edges (bb
))
1669 return cfg_changed
? TODO_cleanup_cfg
: 0;
1674 const pass_data pass_data_lower_vector
=
1676 GIMPLE_PASS
, /* type */
1677 "veclower", /* name */
1678 OPTGROUP_VEC
, /* optinfo_flags */
1679 TV_NONE
, /* tv_id */
1680 PROP_cfg
, /* properties_required */
1681 PROP_gimple_lvec
, /* properties_provided */
1682 0, /* properties_destroyed */
1683 0, /* todo_flags_start */
1685 | TODO_cleanup_cfg
), /* todo_flags_finish */
1688 class pass_lower_vector
: public gimple_opt_pass
1691 pass_lower_vector (gcc::context
*ctxt
)
1692 : gimple_opt_pass (pass_data_lower_vector
, ctxt
)
1695 /* opt_pass methods: */
1696 virtual bool gate (function
*fun
)
1698 return !(fun
->curr_properties
& PROP_gimple_lvec
);
1701 virtual unsigned int execute (function
*)
1703 return expand_vector_operations ();
1706 }; // class pass_lower_vector
1711 make_pass_lower_vector (gcc::context
*ctxt
)
1713 return new pass_lower_vector (ctxt
);
1718 const pass_data pass_data_lower_vector_ssa
=
1720 GIMPLE_PASS
, /* type */
1721 "veclower2", /* name */
1722 OPTGROUP_VEC
, /* optinfo_flags */
1723 TV_NONE
, /* tv_id */
1724 PROP_cfg
, /* properties_required */
1725 PROP_gimple_lvec
, /* properties_provided */
1726 0, /* properties_destroyed */
1727 0, /* todo_flags_start */
1729 | TODO_cleanup_cfg
), /* todo_flags_finish */
1732 class pass_lower_vector_ssa
: public gimple_opt_pass
1735 pass_lower_vector_ssa (gcc::context
*ctxt
)
1736 : gimple_opt_pass (pass_data_lower_vector_ssa
, ctxt
)
1739 /* opt_pass methods: */
1740 opt_pass
* clone () { return new pass_lower_vector_ssa (m_ctxt
); }
1741 virtual unsigned int execute (function
*)
1743 return expand_vector_operations ();
1746 }; // class pass_lower_vector_ssa
1751 make_pass_lower_vector_ssa (gcc::context
*ctxt
)
1753 return new pass_lower_vector_ssa (ctxt
);
1756 #include "gt-tree-vect-generic.h"