1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
26 #include "double-int.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
37 #include "langhooks.h"
39 #include "hard-reg-set.h"
41 #include "dominance.h"
43 #include "basic-block.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
47 #include "gimple-expr.h"
50 #include "gimple-iterator.h"
51 #include "gimplify-me.h"
52 #include "gimple-ssa.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-iterator.h"
57 #include "tree-pass.h"
59 #include "diagnostic.h"
62 /* Need to include rtl.h, expr.h, etc. for optabs. */
65 #include "statistics.h"
67 #include "fixed-value.h"
68 #include "insn-config.h"
77 #include "insn-codes.h"
81 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
84 /* Build a constant of type TYPE, made of VALUE's bits replicated
85 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
87 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
89 int width
= tree_to_uhwi (TYPE_SIZE (inner_type
));
90 int n
= (TYPE_PRECISION (type
) + HOST_BITS_PER_WIDE_INT
- 1)
91 / HOST_BITS_PER_WIDE_INT
;
92 unsigned HOST_WIDE_INT low
, mask
;
93 HOST_WIDE_INT a
[WIDE_INT_MAX_ELTS
];
96 gcc_assert (n
&& n
<= WIDE_INT_MAX_ELTS
);
98 if (width
== HOST_BITS_PER_WIDE_INT
)
102 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
103 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
106 for (i
= 0; i
< n
; i
++)
109 gcc_assert (TYPE_PRECISION (type
) <= MAX_BITSIZE_MODE_ANY_INT
);
110 return wide_int_to_tree
111 (type
, wide_int::from_array (a
, n
, TYPE_PRECISION (type
)));
114 static GTY(()) tree vector_inner_type
;
115 static GTY(()) tree vector_last_type
;
116 static GTY(()) int vector_last_nunits
;
118 /* Return a suitable vector types made of SUBPARTS units each of mode
119 "word_mode" (the global variable). */
121 build_word_mode_vector_type (int nunits
)
123 if (!vector_inner_type
)
124 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
125 else if (vector_last_nunits
== nunits
)
127 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
128 return vector_last_type
;
131 /* We build a new type, but we canonicalize it nevertheless,
132 because it still saves some memory. */
133 vector_last_nunits
= nunits
;
134 vector_last_type
= type_hash_canon (nunits
,
135 build_vector_type (vector_inner_type
,
137 return vector_last_type
;
140 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
141 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
144 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
145 tree t
, tree bitsize
, tree bitpos
)
148 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
150 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
154 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
155 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
158 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
159 return gimplify_build1 (gsi
, code
, inner_type
, a
);
163 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
164 tree bitpos
, tree bitsize
, enum tree_code code
)
166 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
167 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
168 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
169 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
170 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
173 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
175 INNER_TYPE is the type of A and B elements
177 returned expression is of signed integer type with the
178 size equal to the size of INNER_TYPE. */
180 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
181 tree bitpos
, tree bitsize
, enum tree_code code
)
185 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
186 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
188 comp_type
= build_nonstandard_integer_type
189 (GET_MODE_BITSIZE (TYPE_MODE (inner_type
)), 0);
191 return gimplify_build3 (gsi
, COND_EXPR
, comp_type
,
192 fold_build2 (code
, boolean_type_node
, a
, b
),
193 build_int_cst (comp_type
, -1),
194 build_int_cst (comp_type
, 0));
197 /* Expand vector addition to scalars. This does bit twiddling
198 in order to increase parallelism:
200 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
203 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
204 (a ^ ~b) & 0x80808080
206 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
208 This optimization should be done only if 4 vector items or more
211 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
212 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
215 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
216 unsigned HOST_WIDE_INT max
;
217 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
219 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
220 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
221 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
223 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
224 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
226 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
227 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
228 if (code
== PLUS_EXPR
)
229 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
232 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
233 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
236 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
237 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
238 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
242 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
243 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
244 tree bitsize ATTRIBUTE_UNUSED
,
245 enum tree_code code ATTRIBUTE_UNUSED
)
247 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
249 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
251 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
252 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
253 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
255 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
257 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
258 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
259 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
260 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
261 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
264 /* Expand a vector operation to scalars, by using many operations
265 whose type is the vector type's inner type. */
267 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
268 tree type
, tree inner_type
,
269 tree a
, tree b
, enum tree_code code
)
271 vec
<constructor_elt
, va_gc
> *v
;
272 tree part_width
= TYPE_SIZE (inner_type
);
273 tree index
= bitsize_int (0);
274 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
275 int delta
= tree_to_uhwi (part_width
)
276 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type
)));
278 location_t loc
= gimple_location (gsi_stmt (*gsi
));
280 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
281 warning_at (loc
, OPT_Wvector_operation_performance
,
282 "vector operation will be expanded piecewise");
284 warning_at (loc
, OPT_Wvector_operation_performance
,
285 "vector operation will be expanded in parallel");
287 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
288 for (i
= 0; i
< nunits
;
289 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
291 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
);
292 constructor_elt ce
= {NULL_TREE
, result
};
296 return build_constructor (type
, v
);
299 /* Expand a vector operation to scalars with the freedom to use
300 a scalar integer type, or to use a different size for the items
301 in the vector type. */
303 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
307 tree result
, compute_type
;
309 int n_words
= tree_to_uhwi (TYPE_SIZE_UNIT (type
)) / UNITS_PER_WORD
;
310 location_t loc
= gimple_location (gsi_stmt (*gsi
));
312 /* We have three strategies. If the type is already correct, just do
313 the operation an element at a time. Else, if the vector is wider than
314 one word, do it a word at a time; finally, if the vector is smaller
315 than one word, do it as a scalar. */
316 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
317 return expand_vector_piecewise (gsi
, f
,
318 type
, TREE_TYPE (type
),
320 else if (n_words
> 1)
322 tree word_type
= build_word_mode_vector_type (n_words
);
323 result
= expand_vector_piecewise (gsi
, f
,
324 word_type
, TREE_TYPE (word_type
),
326 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
331 /* Use a single scalar operation with a mode no wider than word_mode. */
332 mode
= mode_for_size (tree_to_uhwi (TYPE_SIZE (type
)), MODE_INT
, 0);
333 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
334 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
335 warning_at (loc
, OPT_Wvector_operation_performance
,
336 "vector operation will be expanded with a "
337 "single scalar operation");
343 /* Expand a vector operation to scalars; for integer types we can use
344 special bit twiddling tricks to do the sums a word at a time, using
345 function F_PARALLEL instead of F. These tricks are done only if
346 they can process at least four items, that is, only if the vector
347 holds at least four items and if a word can hold four items. */
349 expand_vector_addition (gimple_stmt_iterator
*gsi
,
350 elem_op_func f
, elem_op_func f_parallel
,
351 tree type
, tree a
, tree b
, enum tree_code code
)
353 int parts_per_word
= UNITS_PER_WORD
354 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
356 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
357 && parts_per_word
>= 4
358 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
359 return expand_vector_parallel (gsi
, f_parallel
,
362 return expand_vector_piecewise (gsi
, f
,
363 type
, TREE_TYPE (type
),
367 /* Try to expand vector comparison expression OP0 CODE OP1 by
368 querying optab if the following expression:
369 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
372 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
373 tree op1
, enum tree_code code
)
376 if (! expand_vec_cond_expr_p (type
, TREE_TYPE (op0
)))
377 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
378 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
385 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
386 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
387 the result if successful, otherwise return NULL_TREE. */
389 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
392 unsigned int i
, nunits
= TYPE_VECTOR_SUBPARTS (type
);
393 bool scalar_shift
= true;
395 for (i
= 1; i
< nunits
; i
++)
397 if (shiftcnts
[i
] != shiftcnts
[0])
398 scalar_shift
= false;
401 if (scalar_shift
&& shiftcnts
[0] == 0)
406 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
407 if (op
!= unknown_optab
408 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
409 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
410 build_int_cst (NULL_TREE
, shiftcnts
[0]));
413 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
414 if (op
!= unknown_optab
415 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
417 tree
*vec
= XALLOCAVEC (tree
, nunits
);
418 for (i
= 0; i
< nunits
; i
++)
419 vec
[i
] = build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]);
420 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
421 build_vector (type
, vec
));
427 /* Try to expand integer vector division by constant using
428 widening multiply, shifts and additions. */
430 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
431 tree op1
, enum tree_code code
)
433 bool use_pow2
= true;
434 bool has_vector_shift
= true;
435 int mode
= -1, this_mode
;
436 int pre_shift
= -1, post_shift
;
437 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (type
);
438 int *shifts
= XALLOCAVEC (int, nunits
* 4);
439 int *pre_shifts
= shifts
+ nunits
;
440 int *post_shifts
= pre_shifts
+ nunits
;
441 int *shift_temps
= post_shifts
+ nunits
;
442 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
443 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
446 signop sign_p
= TYPE_SIGN (TREE_TYPE (type
));
447 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
449 tree cur_op
, mulcst
, tem
;
452 if (prec
> HOST_BITS_PER_WIDE_INT
)
455 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
456 if (op
== unknown_optab
457 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
458 has_vector_shift
= false;
460 /* Analysis phase. Determine if all op1 elements are either power
461 of two and it is possible to expand it using shifts (or for remainder
462 using masking). Additionally compute the multiplicative constants
463 and pre and post shifts if the division is to be expanded using
464 widening or high part multiplication plus shifts. */
465 for (i
= 0; i
< nunits
; i
++)
467 tree cst
= VECTOR_CST_ELT (op1
, i
);
468 unsigned HOST_WIDE_INT ml
;
470 if (TREE_CODE (cst
) != INTEGER_CST
|| integer_zerop (cst
))
476 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
480 shifts
[i
] = tree_log2 (cst
);
481 if (shifts
[i
] != shifts
[0]
482 && code
== TRUNC_DIV_EXPR
483 && !has_vector_shift
)
488 if (sign_p
== UNSIGNED
)
490 unsigned HOST_WIDE_INT mh
;
491 unsigned HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
) & mask
;
493 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
494 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
503 /* Find a suitable multiplier and right shift count
504 instead of multiplying with D. */
505 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
507 /* If the suggested multiplier is more than SIZE bits, we can
508 do better for even divisors, using an initial right shift. */
509 if ((mh
!= 0 && (d
& 1) == 0)
510 || (!has_vector_shift
&& pre_shift
!= -1))
512 if (has_vector_shift
)
513 pre_shift
= floor_log2 (d
& -d
);
514 else if (pre_shift
== -1)
517 for (j
= 0; j
< nunits
; j
++)
519 tree cst2
= VECTOR_CST_ELT (op1
, j
);
520 unsigned HOST_WIDE_INT d2
;
523 if (!tree_fits_uhwi_p (cst2
))
525 d2
= tree_to_uhwi (cst2
) & mask
;
528 this_pre_shift
= floor_log2 (d2
& -d2
);
529 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
530 pre_shift
= this_pre_shift
;
532 if (i
!= 0 && pre_shift
!= 0)
542 if ((d
>> pre_shift
) <= 1)
547 mh
= choose_multiplier (d
>> pre_shift
, prec
,
549 &ml
, &post_shift
, &dummy_int
);
551 pre_shifts
[i
] = pre_shift
;
561 HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
);
562 unsigned HOST_WIDE_INT abs_d
;
567 /* Since d might be INT_MIN, we have to cast to
568 unsigned HOST_WIDE_INT before negating to avoid
569 undefined signed overflow. */
571 ? (unsigned HOST_WIDE_INT
) d
572 : - (unsigned HOST_WIDE_INT
) d
);
574 /* n rem d = n rem -d */
575 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
577 else if (abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
579 /* This case is not handled correctly below. */
589 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
590 &post_shift
, &dummy_int
);
591 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
593 this_mode
= 4 + (d
< 0);
594 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
597 this_mode
= 2 + (d
< 0);
600 post_shifts
[i
] = post_shift
;
601 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
602 || post_shift
>= prec
603 || pre_shifts
[i
] >= prec
)
608 else if (mode
!= this_mode
)
612 vec
= XALLOCAVEC (tree
, nunits
);
616 tree addend
= NULL_TREE
;
617 if (sign_p
== SIGNED
)
621 /* Both division and remainder sequences need
622 op0 < 0 ? mask : 0 computed. It can be either computed as
623 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
624 if none of the shifts is 0, or as the conditional. */
625 for (i
= 0; i
< nunits
; i
++)
629 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
631 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
633 for (i
= 0; i
< nunits
; i
++)
634 shift_temps
[i
] = prec
- 1;
635 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
636 if (cur_op
!= NULL_TREE
)
638 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
640 for (i
= 0; i
< nunits
; i
++)
641 shift_temps
[i
] = prec
- shifts
[i
];
642 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
643 if (cur_op
!= NULL_TREE
)
644 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
648 if (addend
== NULL_TREE
649 && expand_vec_cond_expr_p (type
, type
))
651 tree zero
, cst
, cond
;
654 zero
= build_zero_cst (type
);
655 cond
= build2 (LT_EXPR
, type
, op0
, zero
);
656 for (i
= 0; i
< nunits
; i
++)
657 vec
[i
] = build_int_cst (TREE_TYPE (type
),
658 ((unsigned HOST_WIDE_INT
) 1
660 cst
= build_vector (type
, vec
);
661 addend
= make_ssa_name (type
);
662 stmt
= gimple_build_assign (addend
, VEC_COND_EXPR
, cond
,
664 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
667 if (code
== TRUNC_DIV_EXPR
)
669 if (sign_p
== UNSIGNED
)
671 /* q = op0 >> shift; */
672 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
673 if (cur_op
!= NULL_TREE
)
676 else if (addend
!= NULL_TREE
)
678 /* t1 = op0 + addend;
680 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
681 if (op
!= unknown_optab
682 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
684 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
685 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
686 if (cur_op
!= NULL_TREE
)
694 for (i
= 0; i
< nunits
; i
++)
695 vec
[i
] = build_int_cst (TREE_TYPE (type
),
696 ((unsigned HOST_WIDE_INT
) 1
698 mask
= build_vector (type
, vec
);
699 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
700 if (op
!= unknown_optab
701 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
703 if (sign_p
== UNSIGNED
)
704 /* r = op0 & mask; */
705 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
706 else if (addend
!= NULL_TREE
)
708 /* t1 = op0 + addend;
711 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
712 if (op
!= unknown_optab
713 && optab_handler (op
, TYPE_MODE (type
))
716 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
718 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
720 op
= optab_for_tree_code (MINUS_EXPR
, type
,
722 if (op
!= unknown_optab
723 && optab_handler (op
, TYPE_MODE (type
))
725 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
733 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
736 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
744 gcc_assert (sign_p
== UNSIGNED
);
745 /* t1 = oprnd0 >> pre_shift;
747 q = t2 >> post_shift; */
748 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
749 if (cur_op
== NULL_TREE
)
753 gcc_assert (sign_p
== UNSIGNED
);
754 for (i
= 0; i
< nunits
; i
++)
764 gcc_assert (sign_p
== SIGNED
);
765 for (i
= 0; i
< nunits
; i
++)
766 shift_temps
[i
] = prec
- 1;
772 for (i
= 0; i
< nunits
; i
++)
773 vec
[i
] = build_int_cst (TREE_TYPE (type
), mulc
[i
]);
774 mulcst
= build_vector (type
, vec
);
776 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
781 /* t1 = oprnd0 >> pre_shift;
783 q = t2 >> post_shift; */
784 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
787 /* t1 = oprnd0 h* ml;
791 q = t4 >> (post_shift - 1); */
792 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
793 if (op
== unknown_optab
794 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
796 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
797 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
798 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
799 if (op
== unknown_optab
800 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
802 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
803 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
804 if (cur_op
== NULL_TREE
)
811 /* t1 = oprnd0 h* ml;
812 t2 = t1; [ iff (mode & 2) != 0 ]
813 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
814 t3 = t2 >> post_shift;
815 t4 = oprnd0 >> (prec - 1);
816 q = t3 - t4; [ iff (mode & 1) == 0 ]
817 q = t4 - t3; [ iff (mode & 1) != 0 ] */
820 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
821 if (op
== unknown_optab
822 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
824 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
826 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
827 if (cur_op
== NULL_TREE
)
829 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
830 if (tem
== NULL_TREE
)
832 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
833 if (op
== unknown_optab
834 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
837 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
839 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
845 if (code
== TRUNC_DIV_EXPR
)
848 /* We divided. Now finish by:
851 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
852 if (op
== unknown_optab
853 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
855 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
856 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
857 if (op
== unknown_optab
858 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
860 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
863 /* Expand a vector condition to scalars, by using many conditions
864 on the vector's elements. */
866 expand_vector_condition (gimple_stmt_iterator
*gsi
)
868 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
869 tree type
= gimple_expr_type (stmt
);
870 tree a
= gimple_assign_rhs1 (stmt
);
873 bool a_is_comparison
= false;
874 tree b
= gimple_assign_rhs2 (stmt
);
875 tree c
= gimple_assign_rhs3 (stmt
);
876 vec
<constructor_elt
, va_gc
> *v
;
878 tree inner_type
= TREE_TYPE (type
);
879 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
880 tree comp_inner_type
= cond_type
;
881 tree width
= TYPE_SIZE (inner_type
);
882 tree index
= bitsize_int (0);
883 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
885 location_t loc
= gimple_location (gsi_stmt (*gsi
));
887 if (!is_gimple_val (a
))
889 gcc_assert (COMPARISON_CLASS_P (a
));
890 a_is_comparison
= true;
891 a1
= TREE_OPERAND (a
, 0);
892 a2
= TREE_OPERAND (a
, 1);
893 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
896 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
)))
899 /* TODO: try and find a smaller vector type. */
901 warning_at (loc
, OPT_Wvector_operation_performance
,
902 "vector condition will be expanded piecewise");
904 vec_alloc (v
, nunits
);
905 for (i
= 0; i
< nunits
;
906 i
++, index
= int_const_binop (PLUS_EXPR
, index
, width
))
909 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
910 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
913 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
, width
, index
);
914 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
, width
, index
);
915 aa
= build2 (TREE_CODE (a
), cond_type
, aa1
, aa2
);
918 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
919 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
920 constructor_elt ce
= {NULL_TREE
, result
};
924 constr
= build_constructor (type
, v
);
925 gimple_assign_set_rhs_from_tree (gsi
, constr
);
926 update_stmt (gsi_stmt (*gsi
));
930 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
931 gassign
*assign
, enum tree_code code
)
933 machine_mode compute_mode
= TYPE_MODE (compute_type
);
935 /* If the compute mode is not a vector mode (hence we are not decomposing
936 a BLKmode vector to smaller, hardware-supported vectors), we may want
937 to expand the operations in parallel. */
938 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
939 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
940 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
941 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
942 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
943 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
948 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
949 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
950 gimple_assign_rhs1 (assign
),
951 gimple_assign_rhs2 (assign
), code
);
955 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
956 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
957 gimple_assign_rhs1 (assign
),
964 return expand_vector_parallel (gsi
, do_binop
, type
,
965 gimple_assign_rhs1 (assign
),
966 gimple_assign_rhs2 (assign
), code
);
969 return expand_vector_parallel (gsi
, do_unop
, type
,
970 gimple_assign_rhs1 (assign
),
987 tree rhs1
= gimple_assign_rhs1 (assign
);
988 tree rhs2
= gimple_assign_rhs2 (assign
);
990 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
996 tree rhs1
= gimple_assign_rhs1 (assign
);
997 tree rhs2
= gimple_assign_rhs2 (assign
);
1001 || !VECTOR_INTEGER_TYPE_P (type
)
1002 || TREE_CODE (rhs2
) != VECTOR_CST
1003 || !VECTOR_MODE_P (TYPE_MODE (type
)))
1006 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
1007 if (ret
!= NULL_TREE
)
1016 if (TREE_CODE_CLASS (code
) == tcc_unary
)
1017 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
1018 gimple_assign_rhs1 (assign
),
1021 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
1022 gimple_assign_rhs1 (assign
),
1023 gimple_assign_rhs2 (assign
), code
);
1027 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1029 _9 = { b_7, b_7, b_7, b_7 };
1030 a_5 = _9 + { 0, 3, 6, 9 };
1031 because vector splat operation is usually more efficient
1032 than piecewise initialization of the vector. */
1035 optimize_vector_constructor (gimple_stmt_iterator
*gsi
)
1037 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1038 tree lhs
= gimple_assign_lhs (stmt
);
1039 tree rhs
= gimple_assign_rhs1 (stmt
);
1040 tree type
= TREE_TYPE (rhs
);
1041 unsigned int i
, j
, nelts
= TYPE_VECTOR_SUBPARTS (type
);
1042 bool all_same
= true;
1043 constructor_elt
*elt
;
1046 tree base
= NULL_TREE
;
1049 if (nelts
<= 2 || CONSTRUCTOR_NELTS (rhs
) != nelts
)
1051 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
1052 if (op
== unknown_optab
1053 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
1055 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
1056 if (TREE_CODE (elt
->value
) != SSA_NAME
1057 || TREE_CODE (TREE_TYPE (elt
->value
)) == VECTOR_TYPE
)
1061 tree this_base
= elt
->value
;
1062 if (this_base
!= CONSTRUCTOR_ELT (rhs
, 0)->value
)
1064 for (j
= 0; j
< nelts
+ 1; j
++)
1066 g
= SSA_NAME_DEF_STMT (this_base
);
1067 if (is_gimple_assign (g
)
1068 && gimple_assign_rhs_code (g
) == PLUS_EXPR
1069 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
1070 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
1071 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
1072 this_base
= gimple_assign_rhs1 (g
);
1078 else if (this_base
!= base
)
1083 cst
= XALLOCAVEC (tree
, nelts
);
1084 for (i
= 0; i
< nelts
; i
++)
1086 tree this_base
= CONSTRUCTOR_ELT (rhs
, i
)->value
;;
1087 cst
[i
] = build_zero_cst (TREE_TYPE (base
));
1088 while (this_base
!= base
)
1090 g
= SSA_NAME_DEF_STMT (this_base
);
1091 cst
[i
] = fold_binary (PLUS_EXPR
, TREE_TYPE (base
),
1092 cst
[i
], gimple_assign_rhs2 (g
));
1093 if (cst
[i
] == NULL_TREE
1094 || TREE_CODE (cst
[i
]) != INTEGER_CST
1095 || TREE_OVERFLOW (cst
[i
]))
1097 this_base
= gimple_assign_rhs1 (g
);
1100 for (i
= 0; i
< nelts
; i
++)
1101 CONSTRUCTOR_ELT (rhs
, i
)->value
= base
;
1102 g
= gimple_build_assign (make_ssa_name (type
), rhs
);
1103 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1104 g
= gimple_build_assign (lhs
, PLUS_EXPR
, gimple_assign_lhs (g
),
1105 build_vector (type
, cst
));
1106 gsi_replace (gsi
, g
, false);
1109 /* Return a type for the widest vector mode whose components are of type
1110 TYPE, or NULL_TREE if none is found. */
1113 type_for_widest_vector_mode (tree type
, optab op
)
1115 machine_mode inner_mode
= TYPE_MODE (type
);
1116 machine_mode best_mode
= VOIDmode
, mode
;
1117 int best_nunits
= 0;
1119 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1120 mode
= MIN_MODE_VECTOR_FLOAT
;
1121 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1122 mode
= MIN_MODE_VECTOR_FRACT
;
1123 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1124 mode
= MIN_MODE_VECTOR_UFRACT
;
1125 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1126 mode
= MIN_MODE_VECTOR_ACCUM
;
1127 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1128 mode
= MIN_MODE_VECTOR_UACCUM
;
1130 mode
= MIN_MODE_VECTOR_INT
;
1132 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
1133 if (GET_MODE_INNER (mode
) == inner_mode
1134 && GET_MODE_NUNITS (mode
) > best_nunits
1135 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1136 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1138 if (best_mode
== VOIDmode
)
1141 return build_vector_type_for_mode (type
, best_mode
);
1145 /* Build a reference to the element of the vector VECT. Function
1146 returns either the element itself, either BIT_FIELD_REF, or an
1147 ARRAY_REF expression.
1149 GSI is required to insert temporary variables while building a
1150 refernece to the element of the vector VECT.
1152 PTMPVEC is a pointer to the temporary variable for caching
1153 purposes. In case when PTMPVEC is NULL new temporary variable
1156 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1158 tree vect_type
, vect_elt_type
;
1162 bool need_asgn
= true;
1163 unsigned int elements
;
1165 vect_type
= TREE_TYPE (vect
);
1166 vect_elt_type
= TREE_TYPE (vect_type
);
1167 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1169 if (TREE_CODE (idx
) == INTEGER_CST
)
1171 unsigned HOST_WIDE_INT index
;
1173 /* Given that we're about to compute a binary modulus,
1174 we don't care about the high bits of the value. */
1175 index
= TREE_INT_CST_LOW (idx
);
1176 if (!tree_fits_uhwi_p (idx
) || index
>= elements
)
1178 index
&= elements
- 1;
1179 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1182 /* When lowering a vector statement sequence do some easy
1183 simplification by looking through intermediate vector results. */
1184 if (TREE_CODE (vect
) == SSA_NAME
)
1186 gimple def_stmt
= SSA_NAME_DEF_STMT (vect
);
1187 if (is_gimple_assign (def_stmt
)
1188 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1189 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1190 vect
= gimple_assign_rhs1 (def_stmt
);
1193 if (TREE_CODE (vect
) == VECTOR_CST
)
1194 return VECTOR_CST_ELT (vect
, index
);
1195 else if (TREE_CODE (vect
) == CONSTRUCTOR
1196 && (CONSTRUCTOR_NELTS (vect
) == 0
1197 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1200 if (index
< CONSTRUCTOR_NELTS (vect
))
1201 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1202 return build_zero_cst (vect_elt_type
);
1206 tree size
= TYPE_SIZE (vect_elt_type
);
1207 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1209 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1214 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1216 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1225 TREE_ADDRESSABLE (tmpvec
) = 1;
1226 asgn
= gimple_build_assign (tmpvec
, vect
);
1227 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1230 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1231 return build4 (ARRAY_REF
, vect_elt_type
,
1232 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1233 idx
, NULL_TREE
, NULL_TREE
);
1236 /* Check if VEC_PERM_EXPR within the given setting is supported
1237 by hardware, or lower it piecewise.
1239 When VEC_PERM_EXPR has the same first and second operands:
1240 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1241 {v0[mask[0]], v0[mask[1]], ...}
1242 MASK and V0 must have the same number of elements.
1244 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1245 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1246 V0 and V1 must have the same type. MASK, V0, V1 must have the
1247 same number of arguments. */
1250 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1252 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1253 tree mask
= gimple_assign_rhs3 (stmt
);
1254 tree vec0
= gimple_assign_rhs1 (stmt
);
1255 tree vec1
= gimple_assign_rhs2 (stmt
);
1256 tree vect_type
= TREE_TYPE (vec0
);
1257 tree mask_type
= TREE_TYPE (mask
);
1258 tree vect_elt_type
= TREE_TYPE (vect_type
);
1259 tree mask_elt_type
= TREE_TYPE (mask_type
);
1260 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
1261 vec
<constructor_elt
, va_gc
> *v
;
1262 tree constr
, t
, si
, i_val
;
1263 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1264 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1265 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1268 if (TREE_CODE (mask
) == SSA_NAME
)
1270 gimple def_stmt
= SSA_NAME_DEF_STMT (mask
);
1271 if (is_gimple_assign (def_stmt
)
1272 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1273 mask
= gimple_assign_rhs1 (def_stmt
);
1276 if (TREE_CODE (mask
) == VECTOR_CST
)
1278 unsigned char *sel_int
= XALLOCAVEC (unsigned char, elements
);
1280 for (i
= 0; i
< elements
; ++i
)
1281 sel_int
[i
] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask
, i
))
1282 & (2 * elements
- 1));
1284 if (can_vec_perm_p (TYPE_MODE (vect_type
), false, sel_int
))
1286 gimple_assign_set_rhs3 (stmt
, mask
);
1291 else if (can_vec_perm_p (TYPE_MODE (vect_type
), true, NULL
))
1294 warning_at (loc
, OPT_Wvector_operation_performance
,
1295 "vector shuffling operation will be expanded piecewise");
1297 vec_alloc (v
, elements
);
1298 for (i
= 0; i
< elements
; i
++)
1301 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1303 if (TREE_CODE (i_val
) == INTEGER_CST
)
1305 unsigned HOST_WIDE_INT index
;
1307 index
= TREE_INT_CST_LOW (i_val
);
1308 if (!tree_fits_uhwi_p (i_val
) || index
>= elements
)
1309 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1311 if (two_operand_p
&& (index
& elements
) != 0)
1312 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1314 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1316 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1317 true, GSI_SAME_STMT
);
1321 tree cond
= NULL_TREE
, v0_val
;
1325 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1326 build_int_cst (mask_elt_type
, elements
));
1327 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1328 true, GSI_SAME_STMT
);
1331 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1332 build_int_cst (mask_elt_type
, elements
- 1));
1333 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1334 true, GSI_SAME_STMT
);
1336 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1337 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1338 true, GSI_SAME_STMT
);
1344 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1345 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1346 true, GSI_SAME_STMT
);
1348 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1349 cond
, build_zero_cst (mask_elt_type
));
1350 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1351 cond
, v0_val
, v1_val
);
1352 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1353 true, GSI_SAME_STMT
);
1359 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1362 constr
= build_constructor (vect_type
, v
);
1363 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1364 update_stmt (gsi_stmt (*gsi
));
1367 /* Return type in which CODE operation with optab OP can be
1371 get_compute_type (enum tree_code code
, optab op
, tree type
)
1373 /* For very wide vectors, try using a smaller vector mode. */
1374 tree compute_type
= type
;
1376 && (!VECTOR_MODE_P (TYPE_MODE (type
))
1377 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
))
1379 tree vector_compute_type
1380 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1381 if (vector_compute_type
!= NULL_TREE
1382 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
1383 < TYPE_VECTOR_SUBPARTS (compute_type
))
1384 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1385 != CODE_FOR_nothing
))
1386 compute_type
= vector_compute_type
;
1389 /* If we are breaking a BLKmode vector into smaller pieces,
1390 type_for_widest_vector_mode has already looked into the optab,
1391 so skip these checks. */
1392 if (compute_type
== type
)
1394 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1395 if (VECTOR_MODE_P (compute_mode
))
1397 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1398 return compute_type
;
1399 if (code
== MULT_HIGHPART_EXPR
1400 && can_mult_highpart_p (compute_mode
,
1401 TYPE_UNSIGNED (compute_type
)))
1402 return compute_type
;
1404 /* There is no operation in hardware, so fall back to scalars. */
1405 compute_type
= TREE_TYPE (type
);
1408 return compute_type
;
1411 /* Helper function of expand_vector_operations_1. Return number of
1412 vector elements for vector types or 1 for other types. */
1415 count_type_subparts (tree type
)
1417 return VECTOR_TYPE_P (type
) ? TYPE_VECTOR_SUBPARTS (type
) : 1;
1421 do_cond (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
1422 tree bitpos
, tree bitsize
, enum tree_code code
)
1424 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
1425 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
1426 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
1427 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
1428 tree cond
= gimple_assign_rhs1 (gsi_stmt (*gsi
));
1429 return gimplify_build3 (gsi
, code
, inner_type
, cond
, a
, b
);
1432 /* Expand a vector COND_EXPR to scalars, piecewise. */
1434 expand_vector_scalar_condition (gimple_stmt_iterator
*gsi
)
1436 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1437 tree type
= gimple_expr_type (stmt
);
1438 tree compute_type
= get_compute_type (COND_EXPR
, mov_optab
, type
);
1439 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1440 gcc_assert (compute_mode
!= BLKmode
);
1441 tree lhs
= gimple_assign_lhs (stmt
);
1442 tree rhs2
= gimple_assign_rhs2 (stmt
);
1443 tree rhs3
= gimple_assign_rhs3 (stmt
);
1446 /* If the compute mode is not a vector mode (hence we are not decomposing
1447 a BLKmode vector to smaller, hardware-supported vectors), we may want
1448 to expand the operations in parallel. */
1449 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
1450 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
1451 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
1452 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
1453 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
1454 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
1455 new_rhs
= expand_vector_parallel (gsi
, do_cond
, type
, rhs2
, rhs3
,
1458 new_rhs
= expand_vector_piecewise (gsi
, do_cond
, type
, compute_type
,
1459 rhs2
, rhs3
, COND_EXPR
);
1460 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1461 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1464 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1465 way to do it is change expand_vector_operation and its callees to
1466 return a tree_code, RHS1 and RHS2 instead of a tree. */
1467 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1468 update_stmt (gsi_stmt (*gsi
));
1471 /* Process one statement. If we identify a vector operation, expand it. */
1474 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
1476 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
= NULL_TREE
;
1477 enum tree_code code
;
1478 optab op
= unknown_optab
;
1479 enum gimple_rhs_class rhs_class
;
1482 /* Only consider code == GIMPLE_ASSIGN. */
1483 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (*gsi
));
1487 code
= gimple_assign_rhs_code (stmt
);
1488 rhs_class
= get_gimple_rhs_class (code
);
1489 lhs
= gimple_assign_lhs (stmt
);
1491 if (code
== VEC_PERM_EXPR
)
1493 lower_vec_perm (gsi
);
1497 if (code
== VEC_COND_EXPR
)
1499 expand_vector_condition (gsi
);
1503 if (code
== COND_EXPR
1504 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == VECTOR_TYPE
1505 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == BLKmode
)
1507 expand_vector_scalar_condition (gsi
);
1511 if (code
== CONSTRUCTOR
1512 && TREE_CODE (lhs
) == SSA_NAME
1513 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs
)))
1514 && !gimple_clobber_p (stmt
)
1517 optimize_vector_constructor (gsi
);
1521 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
1524 rhs1
= gimple_assign_rhs1 (stmt
);
1525 type
= gimple_expr_type (stmt
);
1526 if (rhs_class
== GIMPLE_BINARY_RHS
)
1527 rhs2
= gimple_assign_rhs2 (stmt
);
1529 if (TREE_CODE (type
) != VECTOR_TYPE
)
1532 if (CONVERT_EXPR_CODE_P (code
)
1533 || code
== FLOAT_EXPR
1534 || code
== FIX_TRUNC_EXPR
1535 || code
== VIEW_CONVERT_EXPR
)
1538 /* The signedness is determined from input argument. */
1539 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
1540 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
1541 type
= TREE_TYPE (rhs1
);
1543 /* For widening/narrowing vector operations, the relevant type is of the
1544 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1545 calculated in the same way above. */
1546 if (code
== WIDEN_SUM_EXPR
1547 || code
== VEC_WIDEN_MULT_HI_EXPR
1548 || code
== VEC_WIDEN_MULT_LO_EXPR
1549 || code
== VEC_WIDEN_MULT_EVEN_EXPR
1550 || code
== VEC_WIDEN_MULT_ODD_EXPR
1551 || code
== VEC_UNPACK_HI_EXPR
1552 || code
== VEC_UNPACK_LO_EXPR
1553 || code
== VEC_PACK_TRUNC_EXPR
1554 || code
== VEC_PACK_SAT_EXPR
1555 || code
== VEC_PACK_FIX_TRUNC_EXPR
1556 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
1557 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
1558 type
= TREE_TYPE (rhs1
);
1560 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1562 if (code
== LSHIFT_EXPR
1563 || code
== RSHIFT_EXPR
1564 || code
== LROTATE_EXPR
1565 || code
== RROTATE_EXPR
)
1569 /* Check whether we have vector <op> {x,x,x,x} where x
1570 could be a scalar variable or a constant. Transform
1571 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1572 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1577 if ((TREE_CODE (rhs2
) == VECTOR_CST
1578 && (first
= uniform_vector_p (rhs2
)) != NULL_TREE
)
1579 || (TREE_CODE (rhs2
) == SSA_NAME
1580 && (def_stmt
= SSA_NAME_DEF_STMT (rhs2
))
1581 && gimple_assign_single_p (def_stmt
)
1582 && (first
= uniform_vector_p
1583 (gimple_assign_rhs1 (def_stmt
))) != NULL_TREE
))
1585 gimple_assign_set_rhs2 (stmt
, first
);
1591 opv
= optab_for_tree_code (code
, type
, optab_vector
);
1592 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1596 op
= optab_for_tree_code (code
, type
, optab_scalar
);
1598 compute_type
= get_compute_type (code
, op
, type
);
1599 if (compute_type
== type
)
1601 /* The rtl expander will expand vector/scalar as vector/vector
1602 if necessary. Pick one with wider vector type. */
1603 tree compute_vtype
= get_compute_type (code
, opv
, type
);
1604 if (count_type_subparts (compute_vtype
)
1605 > count_type_subparts (compute_type
))
1607 compute_type
= compute_vtype
;
1612 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1614 if (compute_type
== NULL_TREE
)
1615 compute_type
= get_compute_type (code
, op
, type
);
1616 if (compute_type
== type
)
1618 /* Before splitting vector rotates into scalar rotates,
1619 see if we can't use vector shifts and BIT_IOR_EXPR
1620 instead. For vector by vector rotates we'd also
1621 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1622 for now, fold doesn't seem to create such rotates anyway. */
1623 if (compute_type
== TREE_TYPE (type
)
1624 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
1626 optab oplv
= vashl_optab
, opl
= ashl_optab
;
1627 optab oprv
= vlshr_optab
, opr
= lshr_optab
, opo
= ior_optab
;
1628 tree compute_lvtype
= get_compute_type (LSHIFT_EXPR
, oplv
, type
);
1629 tree compute_rvtype
= get_compute_type (RSHIFT_EXPR
, oprv
, type
);
1630 tree compute_otype
= get_compute_type (BIT_IOR_EXPR
, opo
, type
);
1631 tree compute_ltype
= get_compute_type (LSHIFT_EXPR
, opl
, type
);
1632 tree compute_rtype
= get_compute_type (RSHIFT_EXPR
, opr
, type
);
1633 /* The rtl expander will expand vector/scalar as vector/vector
1634 if necessary. Pick one with wider vector type. */
1635 if (count_type_subparts (compute_lvtype
)
1636 > count_type_subparts (compute_ltype
))
1638 compute_ltype
= compute_lvtype
;
1641 if (count_type_subparts (compute_rvtype
)
1642 > count_type_subparts (compute_rtype
))
1644 compute_rtype
= compute_rvtype
;
1647 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1649 compute_type
= compute_ltype
;
1650 if (count_type_subparts (compute_type
)
1651 > count_type_subparts (compute_rtype
))
1652 compute_type
= compute_rtype
;
1653 if (count_type_subparts (compute_type
)
1654 > count_type_subparts (compute_otype
))
1655 compute_type
= compute_otype
;
1656 /* Verify all 3 operations can be performed in that type. */
1657 if (compute_type
!= TREE_TYPE (type
))
1659 if (optab_handler (opl
, TYPE_MODE (compute_type
))
1661 || optab_handler (opr
, TYPE_MODE (compute_type
))
1663 || optab_handler (opo
, TYPE_MODE (compute_type
))
1664 == CODE_FOR_nothing
)
1665 compute_type
= TREE_TYPE (type
);
1671 op
= optab_for_tree_code (code
, type
, optab_default
);
1673 /* Optabs will try converting a negation into a subtraction, so
1674 look for it as well. TODO: negation of floating-point vectors
1675 might be turned into an exclusive OR toggling the sign bit. */
1676 if (op
== unknown_optab
1677 && code
== NEGATE_EXPR
1678 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
1679 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
1681 if (compute_type
== NULL_TREE
)
1682 compute_type
= get_compute_type (code
, op
, type
);
1683 if (compute_type
== type
)
1686 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
1688 /* Leave expression untouched for later expansion. */
1689 if (new_rhs
== NULL_TREE
)
1692 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1693 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1696 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1697 way to do it is change expand_vector_operation and its callees to
1698 return a tree_code, RHS1 and RHS2 instead of a tree. */
1699 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1700 update_stmt (gsi_stmt (*gsi
));
1703 /* Use this to lower vector operations introduced by the vectorizer,
1704 if it may need the bit-twiddling tricks implemented in this file. */
1707 expand_vector_operations (void)
1709 gimple_stmt_iterator gsi
;
1711 bool cfg_changed
= false;
1713 FOR_EACH_BB_FN (bb
, cfun
)
1715 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1717 expand_vector_operations_1 (&gsi
);
1718 /* ??? If we do not cleanup EH then we will ICE in
1719 verification. But in reality we have created wrong-code
1720 as we did not properly transition EH info and edges to
1721 the piecewise computations. */
1722 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
1723 && gimple_purge_dead_eh_edges (bb
))
1728 return cfg_changed
? TODO_cleanup_cfg
: 0;
1733 const pass_data pass_data_lower_vector
=
1735 GIMPLE_PASS
, /* type */
1736 "veclower", /* name */
1737 OPTGROUP_VEC
, /* optinfo_flags */
1738 TV_NONE
, /* tv_id */
1739 PROP_cfg
, /* properties_required */
1740 PROP_gimple_lvec
, /* properties_provided */
1741 0, /* properties_destroyed */
1742 0, /* todo_flags_start */
1743 TODO_update_ssa
, /* todo_flags_finish */
1746 class pass_lower_vector
: public gimple_opt_pass
1749 pass_lower_vector (gcc::context
*ctxt
)
1750 : gimple_opt_pass (pass_data_lower_vector
, ctxt
)
1753 /* opt_pass methods: */
1754 virtual bool gate (function
*fun
)
1756 return !(fun
->curr_properties
& PROP_gimple_lvec
);
1759 virtual unsigned int execute (function
*)
1761 return expand_vector_operations ();
1764 }; // class pass_lower_vector
1769 make_pass_lower_vector (gcc::context
*ctxt
)
1771 return new pass_lower_vector (ctxt
);
1776 const pass_data pass_data_lower_vector_ssa
=
1778 GIMPLE_PASS
, /* type */
1779 "veclower2", /* name */
1780 OPTGROUP_VEC
, /* optinfo_flags */
1781 TV_NONE
, /* tv_id */
1782 PROP_cfg
, /* properties_required */
1783 PROP_gimple_lvec
, /* properties_provided */
1784 0, /* properties_destroyed */
1785 0, /* todo_flags_start */
1787 | TODO_cleanup_cfg
), /* todo_flags_finish */
1790 class pass_lower_vector_ssa
: public gimple_opt_pass
1793 pass_lower_vector_ssa (gcc::context
*ctxt
)
1794 : gimple_opt_pass (pass_data_lower_vector_ssa
, ctxt
)
1797 /* opt_pass methods: */
1798 opt_pass
* clone () { return new pass_lower_vector_ssa (m_ctxt
); }
1799 virtual unsigned int execute (function
*)
1801 return expand_vector_operations ();
1804 }; // class pass_lower_vector_ssa
1809 make_pass_lower_vector_ssa (gcc::context
*ctxt
)
1811 return new pass_lower_vector_ssa (ctxt
);
1814 #include "gt-tree-vect-generic.h"