[AArch64] PR target/68129: Define TARGET_SUPPORTS_WIDE_INT
[official-gcc.git] / gcc / tree-vect-generic.c
blobf82b6696a34e5cf04e4d350cbf551ddb219f4b61
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "tree-cfg.h"
41 static void expand_vector_operations_1 (gimple_stmt_iterator *);
44 /* Build a constant of type TYPE, made of VALUE's bits replicated
45 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
46 static tree
47 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
49 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
50 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
51 / HOST_BITS_PER_WIDE_INT;
52 unsigned HOST_WIDE_INT low, mask;
53 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
54 int i;
56 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
58 if (width == HOST_BITS_PER_WIDE_INT)
59 low = value;
60 else
62 mask = ((HOST_WIDE_INT)1 << width) - 1;
63 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
66 for (i = 0; i < n; i++)
67 a[i] = low;
69 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
70 return wide_int_to_tree
71 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
74 static GTY(()) tree vector_inner_type;
75 static GTY(()) tree vector_last_type;
76 static GTY(()) int vector_last_nunits;
78 /* Return a suitable vector types made of SUBPARTS units each of mode
79 "word_mode" (the global variable). */
80 static tree
81 build_word_mode_vector_type (int nunits)
83 if (!vector_inner_type)
84 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
85 else if (vector_last_nunits == nunits)
87 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
88 return vector_last_type;
91 /* We build a new type, but we canonicalize it nevertheless,
92 because it still saves some memory. */
93 vector_last_nunits = nunits;
94 vector_last_type = type_hash_canon (nunits,
95 build_vector_type (vector_inner_type,
96 nunits));
97 return vector_last_type;
100 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
101 tree, tree, tree, tree, tree, enum tree_code,
102 tree);
104 static inline tree
105 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
106 tree t, tree bitsize, tree bitpos)
108 if (bitpos)
110 if (TREE_CODE (type) == BOOLEAN_TYPE)
112 tree itype
113 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
114 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
115 bitsize, bitpos);
116 return gimplify_build2 (gsi, NE_EXPR, type, field,
117 build_zero_cst (itype));
119 else
120 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
122 else
123 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
126 static tree
127 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
128 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
129 enum tree_code code, tree type ATTRIBUTE_UNUSED)
131 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
132 return gimplify_build1 (gsi, code, inner_type, a);
135 static tree
136 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
137 tree bitpos, tree bitsize, enum tree_code code,
138 tree type ATTRIBUTE_UNUSED)
140 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
141 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
142 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
143 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
144 return gimplify_build2 (gsi, code, inner_type, a, b);
147 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
149 INNER_TYPE is the type of A and B elements
151 returned expression is of signed integer type with the
152 size equal to the size of INNER_TYPE. */
153 static tree
154 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
155 tree bitpos, tree bitsize, enum tree_code code, tree type)
157 tree stype = TREE_TYPE (type);
158 tree cst_false = build_zero_cst (stype);
159 tree cst_true = build_all_ones_cst (stype);
160 tree cmp;
162 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
163 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
165 cmp = build2 (code, boolean_type_node, a, b);
166 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
169 /* Expand vector addition to scalars. This does bit twiddling
170 in order to increase parallelism:
172 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
173 (a ^ b) & 0x80808080
175 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
176 (a ^ ~b) & 0x80808080
178 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
180 This optimization should be done only if 4 vector items or more
181 fit into a word. */
182 static tree
183 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
184 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
185 enum tree_code code, tree type ATTRIBUTE_UNUSED)
187 tree inner_type = TREE_TYPE (TREE_TYPE (a));
188 unsigned HOST_WIDE_INT max;
189 tree low_bits, high_bits, a_low, b_low, result_low, signs;
191 max = GET_MODE_MASK (TYPE_MODE (inner_type));
192 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
193 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
195 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
196 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
198 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
199 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
200 if (code == PLUS_EXPR)
201 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
202 else
204 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
205 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
208 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
209 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
210 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
213 static tree
214 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
215 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
216 tree bitsize ATTRIBUTE_UNUSED,
217 enum tree_code code ATTRIBUTE_UNUSED,
218 tree type ATTRIBUTE_UNUSED)
220 tree inner_type = TREE_TYPE (TREE_TYPE (b));
221 HOST_WIDE_INT max;
222 tree low_bits, high_bits, b_low, result_low, signs;
224 max = GET_MODE_MASK (TYPE_MODE (inner_type));
225 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
226 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
228 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
230 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
231 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
232 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
233 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
234 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
237 /* Expand a vector operation to scalars, by using many operations
238 whose type is the vector type's inner type. */
239 static tree
240 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
241 tree type, tree inner_type,
242 tree a, tree b, enum tree_code code)
244 vec<constructor_elt, va_gc> *v;
245 tree part_width = TYPE_SIZE (inner_type);
246 tree index = bitsize_int (0);
247 int nunits = TYPE_VECTOR_SUBPARTS (type);
248 int delta = tree_to_uhwi (part_width)
249 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
250 int i;
251 location_t loc = gimple_location (gsi_stmt (*gsi));
253 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
254 warning_at (loc, OPT_Wvector_operation_performance,
255 "vector operation will be expanded piecewise");
256 else
257 warning_at (loc, OPT_Wvector_operation_performance,
258 "vector operation will be expanded in parallel");
260 vec_alloc (v, (nunits + delta - 1) / delta);
261 for (i = 0; i < nunits;
262 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
264 tree result = f (gsi, inner_type, a, b, index, part_width, code, type);
265 constructor_elt ce = {NULL_TREE, result};
266 v->quick_push (ce);
269 return build_constructor (type, v);
272 /* Expand a vector operation to scalars with the freedom to use
273 a scalar integer type, or to use a different size for the items
274 in the vector type. */
275 static tree
276 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
277 tree a, tree b,
278 enum tree_code code)
280 tree result, compute_type;
281 machine_mode mode;
282 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
283 location_t loc = gimple_location (gsi_stmt (*gsi));
285 /* We have three strategies. If the type is already correct, just do
286 the operation an element at a time. Else, if the vector is wider than
287 one word, do it a word at a time; finally, if the vector is smaller
288 than one word, do it as a scalar. */
289 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
290 return expand_vector_piecewise (gsi, f,
291 type, TREE_TYPE (type),
292 a, b, code);
293 else if (n_words > 1)
295 tree word_type = build_word_mode_vector_type (n_words);
296 result = expand_vector_piecewise (gsi, f,
297 word_type, TREE_TYPE (word_type),
298 a, b, code);
299 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
300 GSI_SAME_STMT);
302 else
304 /* Use a single scalar operation with a mode no wider than word_mode. */
305 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
306 compute_type = lang_hooks.types.type_for_mode (mode, 1);
307 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
308 warning_at (loc, OPT_Wvector_operation_performance,
309 "vector operation will be expanded with a "
310 "single scalar operation");
313 return result;
316 /* Expand a vector operation to scalars; for integer types we can use
317 special bit twiddling tricks to do the sums a word at a time, using
318 function F_PARALLEL instead of F. These tricks are done only if
319 they can process at least four items, that is, only if the vector
320 holds at least four items and if a word can hold four items. */
321 static tree
322 expand_vector_addition (gimple_stmt_iterator *gsi,
323 elem_op_func f, elem_op_func f_parallel,
324 tree type, tree a, tree b, enum tree_code code)
326 int parts_per_word = UNITS_PER_WORD
327 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
329 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
330 && parts_per_word >= 4
331 && TYPE_VECTOR_SUBPARTS (type) >= 4)
332 return expand_vector_parallel (gsi, f_parallel,
333 type, a, b, code);
334 else
335 return expand_vector_piecewise (gsi, f,
336 type, TREE_TYPE (type),
337 a, b, code);
340 /* Try to expand vector comparison expression OP0 CODE OP1 by
341 querying optab if the following expression:
342 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
343 can be expanded. */
344 static tree
345 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
346 tree op1, enum tree_code code)
348 tree t;
349 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
350 t = expand_vector_piecewise (gsi, do_compare, type,
351 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
352 else
353 t = NULL_TREE;
355 return t;
358 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
359 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
360 the result if successful, otherwise return NULL_TREE. */
361 static tree
362 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
364 optab op;
365 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
366 bool scalar_shift = true;
368 for (i = 1; i < nunits; i++)
370 if (shiftcnts[i] != shiftcnts[0])
371 scalar_shift = false;
374 if (scalar_shift && shiftcnts[0] == 0)
375 return op0;
377 if (scalar_shift)
379 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
380 if (op != unknown_optab
381 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
382 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
383 build_int_cst (NULL_TREE, shiftcnts[0]));
386 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
387 if (op != unknown_optab
388 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
390 tree *vec = XALLOCAVEC (tree, nunits);
391 for (i = 0; i < nunits; i++)
392 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
393 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
394 build_vector (type, vec));
397 return NULL_TREE;
400 /* Try to expand integer vector division by constant using
401 widening multiply, shifts and additions. */
402 static tree
403 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
404 tree op1, enum tree_code code)
406 bool use_pow2 = true;
407 bool has_vector_shift = true;
408 int mode = -1, this_mode;
409 int pre_shift = -1, post_shift;
410 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
411 int *shifts = XALLOCAVEC (int, nunits * 4);
412 int *pre_shifts = shifts + nunits;
413 int *post_shifts = pre_shifts + nunits;
414 int *shift_temps = post_shifts + nunits;
415 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
416 int prec = TYPE_PRECISION (TREE_TYPE (type));
417 int dummy_int;
418 unsigned int i;
419 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
420 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
421 tree *vec;
422 tree cur_op, mulcst, tem;
423 optab op;
425 if (prec > HOST_BITS_PER_WIDE_INT)
426 return NULL_TREE;
428 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
429 if (op == unknown_optab
430 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
431 has_vector_shift = false;
433 /* Analysis phase. Determine if all op1 elements are either power
434 of two and it is possible to expand it using shifts (or for remainder
435 using masking). Additionally compute the multiplicative constants
436 and pre and post shifts if the division is to be expanded using
437 widening or high part multiplication plus shifts. */
438 for (i = 0; i < nunits; i++)
440 tree cst = VECTOR_CST_ELT (op1, i);
441 unsigned HOST_WIDE_INT ml;
443 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
444 return NULL_TREE;
445 pre_shifts[i] = 0;
446 post_shifts[i] = 0;
447 mulc[i] = 0;
448 if (use_pow2
449 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
450 use_pow2 = false;
451 if (use_pow2)
453 shifts[i] = tree_log2 (cst);
454 if (shifts[i] != shifts[0]
455 && code == TRUNC_DIV_EXPR
456 && !has_vector_shift)
457 use_pow2 = false;
459 if (mode == -2)
460 continue;
461 if (sign_p == UNSIGNED)
463 unsigned HOST_WIDE_INT mh;
464 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
466 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
467 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
468 return NULL_TREE;
470 if (d <= 1)
472 mode = -2;
473 continue;
476 /* Find a suitable multiplier and right shift count
477 instead of multiplying with D. */
478 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
480 /* If the suggested multiplier is more than SIZE bits, we can
481 do better for even divisors, using an initial right shift. */
482 if ((mh != 0 && (d & 1) == 0)
483 || (!has_vector_shift && pre_shift != -1))
485 if (has_vector_shift)
486 pre_shift = floor_log2 (d & -d);
487 else if (pre_shift == -1)
489 unsigned int j;
490 for (j = 0; j < nunits; j++)
492 tree cst2 = VECTOR_CST_ELT (op1, j);
493 unsigned HOST_WIDE_INT d2;
494 int this_pre_shift;
496 if (!tree_fits_uhwi_p (cst2))
497 return NULL_TREE;
498 d2 = tree_to_uhwi (cst2) & mask;
499 if (d2 == 0)
500 return NULL_TREE;
501 this_pre_shift = floor_log2 (d2 & -d2);
502 if (pre_shift == -1 || this_pre_shift < pre_shift)
503 pre_shift = this_pre_shift;
505 if (i != 0 && pre_shift != 0)
507 /* Restart. */
508 i = -1U;
509 mode = -1;
510 continue;
513 if (pre_shift != 0)
515 if ((d >> pre_shift) <= 1)
517 mode = -2;
518 continue;
520 mh = choose_multiplier (d >> pre_shift, prec,
521 prec - pre_shift,
522 &ml, &post_shift, &dummy_int);
523 gcc_assert (!mh);
524 pre_shifts[i] = pre_shift;
527 if (!mh)
528 this_mode = 0;
529 else
530 this_mode = 1;
532 else
534 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
535 unsigned HOST_WIDE_INT abs_d;
537 if (d == -1)
538 return NULL_TREE;
540 /* Since d might be INT_MIN, we have to cast to
541 unsigned HOST_WIDE_INT before negating to avoid
542 undefined signed overflow. */
543 abs_d = (d >= 0
544 ? (unsigned HOST_WIDE_INT) d
545 : - (unsigned HOST_WIDE_INT) d);
547 /* n rem d = n rem -d */
548 if (code == TRUNC_MOD_EXPR && d < 0)
549 d = abs_d;
550 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
552 /* This case is not handled correctly below. */
553 mode = -2;
554 continue;
556 if (abs_d <= 1)
558 mode = -2;
559 continue;
562 choose_multiplier (abs_d, prec, prec - 1, &ml,
563 &post_shift, &dummy_int);
564 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
566 this_mode = 4 + (d < 0);
567 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
569 else
570 this_mode = 2 + (d < 0);
572 mulc[i] = ml;
573 post_shifts[i] = post_shift;
574 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
575 || post_shift >= prec
576 || pre_shifts[i] >= prec)
577 this_mode = -2;
579 if (i == 0)
580 mode = this_mode;
581 else if (mode != this_mode)
582 mode = -2;
585 vec = XALLOCAVEC (tree, nunits);
587 if (use_pow2)
589 tree addend = NULL_TREE;
590 if (sign_p == SIGNED)
592 tree uns_type;
594 /* Both division and remainder sequences need
595 op0 < 0 ? mask : 0 computed. It can be either computed as
596 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
597 if none of the shifts is 0, or as the conditional. */
598 for (i = 0; i < nunits; i++)
599 if (shifts[i] == 0)
600 break;
601 uns_type
602 = build_vector_type (build_nonstandard_integer_type (prec, 1),
603 nunits);
604 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
606 for (i = 0; i < nunits; i++)
607 shift_temps[i] = prec - 1;
608 cur_op = add_rshift (gsi, type, op0, shift_temps);
609 if (cur_op != NULL_TREE)
611 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
612 uns_type, cur_op);
613 for (i = 0; i < nunits; i++)
614 shift_temps[i] = prec - shifts[i];
615 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
616 if (cur_op != NULL_TREE)
617 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
618 type, cur_op);
621 if (addend == NULL_TREE
622 && expand_vec_cond_expr_p (type, type))
624 tree zero, cst, cond, mask_type;
625 gimple *stmt;
627 mask_type = build_same_sized_truth_vector_type (type);
628 zero = build_zero_cst (type);
629 cond = build2 (LT_EXPR, mask_type, op0, zero);
630 for (i = 0; i < nunits; i++)
631 vec[i] = build_int_cst (TREE_TYPE (type),
632 ((unsigned HOST_WIDE_INT) 1
633 << shifts[i]) - 1);
634 cst = build_vector (type, vec);
635 addend = make_ssa_name (type);
636 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
637 cst, zero);
638 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
641 if (code == TRUNC_DIV_EXPR)
643 if (sign_p == UNSIGNED)
645 /* q = op0 >> shift; */
646 cur_op = add_rshift (gsi, type, op0, shifts);
647 if (cur_op != NULL_TREE)
648 return cur_op;
650 else if (addend != NULL_TREE)
652 /* t1 = op0 + addend;
653 q = t1 >> shift; */
654 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
655 if (op != unknown_optab
656 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
658 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
659 cur_op = add_rshift (gsi, type, cur_op, shifts);
660 if (cur_op != NULL_TREE)
661 return cur_op;
665 else
667 tree mask;
668 for (i = 0; i < nunits; i++)
669 vec[i] = build_int_cst (TREE_TYPE (type),
670 ((unsigned HOST_WIDE_INT) 1
671 << shifts[i]) - 1);
672 mask = build_vector (type, vec);
673 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
674 if (op != unknown_optab
675 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
677 if (sign_p == UNSIGNED)
678 /* r = op0 & mask; */
679 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
680 else if (addend != NULL_TREE)
682 /* t1 = op0 + addend;
683 t2 = t1 & mask;
684 r = t2 - addend; */
685 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
686 if (op != unknown_optab
687 && optab_handler (op, TYPE_MODE (type))
688 != CODE_FOR_nothing)
690 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
691 addend);
692 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
693 cur_op, mask);
694 op = optab_for_tree_code (MINUS_EXPR, type,
695 optab_default);
696 if (op != unknown_optab
697 && optab_handler (op, TYPE_MODE (type))
698 != CODE_FOR_nothing)
699 return gimplify_build2 (gsi, MINUS_EXPR, type,
700 cur_op, addend);
707 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
708 return NULL_TREE;
710 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
711 return NULL_TREE;
713 cur_op = op0;
715 switch (mode)
717 case 0:
718 gcc_assert (sign_p == UNSIGNED);
719 /* t1 = oprnd0 >> pre_shift;
720 t2 = t1 h* ml;
721 q = t2 >> post_shift; */
722 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
723 if (cur_op == NULL_TREE)
724 return NULL_TREE;
725 break;
726 case 1:
727 gcc_assert (sign_p == UNSIGNED);
728 for (i = 0; i < nunits; i++)
730 shift_temps[i] = 1;
731 post_shifts[i]--;
733 break;
734 case 2:
735 case 3:
736 case 4:
737 case 5:
738 gcc_assert (sign_p == SIGNED);
739 for (i = 0; i < nunits; i++)
740 shift_temps[i] = prec - 1;
741 break;
742 default:
743 return NULL_TREE;
746 for (i = 0; i < nunits; i++)
747 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
748 mulcst = build_vector (type, vec);
750 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
752 switch (mode)
754 case 0:
755 /* t1 = oprnd0 >> pre_shift;
756 t2 = t1 h* ml;
757 q = t2 >> post_shift; */
758 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
759 break;
760 case 1:
761 /* t1 = oprnd0 h* ml;
762 t2 = oprnd0 - t1;
763 t3 = t2 >> 1;
764 t4 = t1 + t3;
765 q = t4 >> (post_shift - 1); */
766 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
767 if (op == unknown_optab
768 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
769 return NULL_TREE;
770 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
771 tem = add_rshift (gsi, type, tem, shift_temps);
772 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
773 if (op == unknown_optab
774 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
775 return NULL_TREE;
776 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
777 cur_op = add_rshift (gsi, type, tem, post_shifts);
778 if (cur_op == NULL_TREE)
779 return NULL_TREE;
780 break;
781 case 2:
782 case 3:
783 case 4:
784 case 5:
785 /* t1 = oprnd0 h* ml;
786 t2 = t1; [ iff (mode & 2) != 0 ]
787 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
788 t3 = t2 >> post_shift;
789 t4 = oprnd0 >> (prec - 1);
790 q = t3 - t4; [ iff (mode & 1) == 0 ]
791 q = t4 - t3; [ iff (mode & 1) != 0 ] */
792 if ((mode & 2) == 0)
794 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
795 if (op == unknown_optab
796 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
797 return NULL_TREE;
798 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
800 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
801 if (cur_op == NULL_TREE)
802 return NULL_TREE;
803 tem = add_rshift (gsi, type, op0, shift_temps);
804 if (tem == NULL_TREE)
805 return NULL_TREE;
806 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
807 if (op == unknown_optab
808 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
809 return NULL_TREE;
810 if ((mode & 1) == 0)
811 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
812 else
813 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
814 break;
815 default:
816 gcc_unreachable ();
819 if (code == TRUNC_DIV_EXPR)
820 return cur_op;
822 /* We divided. Now finish by:
823 t1 = q * oprnd1;
824 r = oprnd0 - t1; */
825 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
826 if (op == unknown_optab
827 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
828 return NULL_TREE;
829 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
830 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
831 if (op == unknown_optab
832 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
833 return NULL_TREE;
834 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
837 /* Expand a vector condition to scalars, by using many conditions
838 on the vector's elements. */
839 static void
840 expand_vector_condition (gimple_stmt_iterator *gsi)
842 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
843 tree type = gimple_expr_type (stmt);
844 tree a = gimple_assign_rhs1 (stmt);
845 tree a1 = a;
846 tree a2 = NULL_TREE;
847 bool a_is_comparison = false;
848 tree b = gimple_assign_rhs2 (stmt);
849 tree c = gimple_assign_rhs3 (stmt);
850 vec<constructor_elt, va_gc> *v;
851 tree constr;
852 tree inner_type = TREE_TYPE (type);
853 tree cond_type = TREE_TYPE (TREE_TYPE (a));
854 tree comp_inner_type = cond_type;
855 tree width = TYPE_SIZE (inner_type);
856 tree index = bitsize_int (0);
857 int nunits = TYPE_VECTOR_SUBPARTS (type);
858 int i;
859 location_t loc = gimple_location (gsi_stmt (*gsi));
861 if (!is_gimple_val (a))
863 gcc_assert (COMPARISON_CLASS_P (a));
864 a_is_comparison = true;
865 a1 = TREE_OPERAND (a, 0);
866 a2 = TREE_OPERAND (a, 1);
867 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
870 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
871 return;
873 /* TODO: try and find a smaller vector type. */
875 warning_at (loc, OPT_Wvector_operation_performance,
876 "vector condition will be expanded piecewise");
878 vec_alloc (v, nunits);
879 for (i = 0; i < nunits;
880 i++, index = int_const_binop (PLUS_EXPR, index, width))
882 tree aa, result;
883 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
884 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
885 if (a_is_comparison)
887 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
888 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
889 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
891 else
892 aa = tree_vec_extract (gsi, cond_type, a, width, index);
893 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
894 constructor_elt ce = {NULL_TREE, result};
895 v->quick_push (ce);
898 constr = build_constructor (type, v);
899 gimple_assign_set_rhs_from_tree (gsi, constr);
900 update_stmt (gsi_stmt (*gsi));
903 static tree
904 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
905 gassign *assign, enum tree_code code)
907 machine_mode compute_mode = TYPE_MODE (compute_type);
909 /* If the compute mode is not a vector mode (hence we are not decomposing
910 a BLKmode vector to smaller, hardware-supported vectors), we may want
911 to expand the operations in parallel. */
912 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
913 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
914 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
915 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
916 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
917 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
918 switch (code)
920 case PLUS_EXPR:
921 case MINUS_EXPR:
922 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
923 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
924 gimple_assign_rhs1 (assign),
925 gimple_assign_rhs2 (assign), code);
926 break;
928 case NEGATE_EXPR:
929 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
930 return expand_vector_addition (gsi, do_unop, do_negate, type,
931 gimple_assign_rhs1 (assign),
932 NULL_TREE, code);
933 break;
935 case BIT_AND_EXPR:
936 case BIT_IOR_EXPR:
937 case BIT_XOR_EXPR:
938 return expand_vector_parallel (gsi, do_binop, type,
939 gimple_assign_rhs1 (assign),
940 gimple_assign_rhs2 (assign), code);
942 case BIT_NOT_EXPR:
943 return expand_vector_parallel (gsi, do_unop, type,
944 gimple_assign_rhs1 (assign),
945 NULL_TREE, code);
946 case EQ_EXPR:
947 case NE_EXPR:
948 case GT_EXPR:
949 case LT_EXPR:
950 case GE_EXPR:
951 case LE_EXPR:
952 case UNEQ_EXPR:
953 case UNGT_EXPR:
954 case UNLT_EXPR:
955 case UNGE_EXPR:
956 case UNLE_EXPR:
957 case LTGT_EXPR:
958 case ORDERED_EXPR:
959 case UNORDERED_EXPR:
961 tree rhs1 = gimple_assign_rhs1 (assign);
962 tree rhs2 = gimple_assign_rhs2 (assign);
964 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
967 case TRUNC_DIV_EXPR:
968 case TRUNC_MOD_EXPR:
970 tree rhs1 = gimple_assign_rhs1 (assign);
971 tree rhs2 = gimple_assign_rhs2 (assign);
972 tree ret;
974 if (!optimize
975 || !VECTOR_INTEGER_TYPE_P (type)
976 || TREE_CODE (rhs2) != VECTOR_CST
977 || !VECTOR_MODE_P (TYPE_MODE (type)))
978 break;
980 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
981 if (ret != NULL_TREE)
982 return ret;
983 break;
986 default:
987 break;
990 if (TREE_CODE_CLASS (code) == tcc_unary)
991 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
992 gimple_assign_rhs1 (assign),
993 NULL_TREE, code);
994 else
995 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
996 gimple_assign_rhs1 (assign),
997 gimple_assign_rhs2 (assign), code);
1000 /* Try to optimize
1001 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1002 style stmts into:
1003 _9 = { b_7, b_7, b_7, b_7 };
1004 a_5 = _9 + { 0, 3, 6, 9 };
1005 because vector splat operation is usually more efficient
1006 than piecewise initialization of the vector. */
1008 static void
1009 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1011 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1012 tree lhs = gimple_assign_lhs (stmt);
1013 tree rhs = gimple_assign_rhs1 (stmt);
1014 tree type = TREE_TYPE (rhs);
1015 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1016 bool all_same = true;
1017 constructor_elt *elt;
1018 tree *cst;
1019 gimple *g;
1020 tree base = NULL_TREE;
1021 optab op;
1023 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1024 return;
1025 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1026 if (op == unknown_optab
1027 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1028 return;
1029 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1030 if (TREE_CODE (elt->value) != SSA_NAME
1031 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1032 return;
1033 else
1035 tree this_base = elt->value;
1036 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1037 all_same = false;
1038 for (j = 0; j < nelts + 1; j++)
1040 g = SSA_NAME_DEF_STMT (this_base);
1041 if (is_gimple_assign (g)
1042 && gimple_assign_rhs_code (g) == PLUS_EXPR
1043 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1044 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1045 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1046 this_base = gimple_assign_rhs1 (g);
1047 else
1048 break;
1050 if (i == 0)
1051 base = this_base;
1052 else if (this_base != base)
1053 return;
1055 if (all_same)
1056 return;
1057 cst = XALLOCAVEC (tree, nelts);
1058 for (i = 0; i < nelts; i++)
1060 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1061 cst[i] = build_zero_cst (TREE_TYPE (base));
1062 while (this_base != base)
1064 g = SSA_NAME_DEF_STMT (this_base);
1065 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1066 cst[i], gimple_assign_rhs2 (g));
1067 if (cst[i] == NULL_TREE
1068 || TREE_CODE (cst[i]) != INTEGER_CST
1069 || TREE_OVERFLOW (cst[i]))
1070 return;
1071 this_base = gimple_assign_rhs1 (g);
1074 for (i = 0; i < nelts; i++)
1075 CONSTRUCTOR_ELT (rhs, i)->value = base;
1076 g = gimple_build_assign (make_ssa_name (type), rhs);
1077 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1078 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1079 build_vector (type, cst));
1080 gsi_replace (gsi, g, false);
1083 /* Return a type for the widest vector mode whose components are of type
1084 TYPE, or NULL_TREE if none is found. */
1086 static tree
1087 type_for_widest_vector_mode (tree type, optab op)
1089 machine_mode inner_mode = TYPE_MODE (type);
1090 machine_mode best_mode = VOIDmode, mode;
1091 int best_nunits = 0;
1093 if (SCALAR_FLOAT_MODE_P (inner_mode))
1094 mode = MIN_MODE_VECTOR_FLOAT;
1095 else if (SCALAR_FRACT_MODE_P (inner_mode))
1096 mode = MIN_MODE_VECTOR_FRACT;
1097 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1098 mode = MIN_MODE_VECTOR_UFRACT;
1099 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1100 mode = MIN_MODE_VECTOR_ACCUM;
1101 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1102 mode = MIN_MODE_VECTOR_UACCUM;
1103 else
1104 mode = MIN_MODE_VECTOR_INT;
1106 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1107 if (GET_MODE_INNER (mode) == inner_mode
1108 && GET_MODE_NUNITS (mode) > best_nunits
1109 && optab_handler (op, mode) != CODE_FOR_nothing)
1110 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1112 if (best_mode == VOIDmode)
1113 return NULL_TREE;
1114 else
1115 return build_vector_type_for_mode (type, best_mode);
1119 /* Build a reference to the element of the vector VECT. Function
1120 returns either the element itself, either BIT_FIELD_REF, or an
1121 ARRAY_REF expression.
1123 GSI is required to insert temporary variables while building a
1124 refernece to the element of the vector VECT.
1126 PTMPVEC is a pointer to the temporary variable for caching
1127 purposes. In case when PTMPVEC is NULL new temporary variable
1128 will be created. */
1129 static tree
1130 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1132 tree vect_type, vect_elt_type;
1133 gimple *asgn;
1134 tree tmpvec;
1135 tree arraytype;
1136 bool need_asgn = true;
1137 unsigned int elements;
1139 vect_type = TREE_TYPE (vect);
1140 vect_elt_type = TREE_TYPE (vect_type);
1141 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1143 if (TREE_CODE (idx) == INTEGER_CST)
1145 unsigned HOST_WIDE_INT index;
1147 /* Given that we're about to compute a binary modulus,
1148 we don't care about the high bits of the value. */
1149 index = TREE_INT_CST_LOW (idx);
1150 if (!tree_fits_uhwi_p (idx) || index >= elements)
1152 index &= elements - 1;
1153 idx = build_int_cst (TREE_TYPE (idx), index);
1156 /* When lowering a vector statement sequence do some easy
1157 simplification by looking through intermediate vector results. */
1158 if (TREE_CODE (vect) == SSA_NAME)
1160 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1161 if (is_gimple_assign (def_stmt)
1162 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1163 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1164 vect = gimple_assign_rhs1 (def_stmt);
1167 if (TREE_CODE (vect) == VECTOR_CST)
1168 return VECTOR_CST_ELT (vect, index);
1169 else if (TREE_CODE (vect) == CONSTRUCTOR
1170 && (CONSTRUCTOR_NELTS (vect) == 0
1171 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1172 != VECTOR_TYPE))
1174 if (index < CONSTRUCTOR_NELTS (vect))
1175 return CONSTRUCTOR_ELT (vect, index)->value;
1176 return build_zero_cst (vect_elt_type);
1178 else
1180 tree size = TYPE_SIZE (vect_elt_type);
1181 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1182 size);
1183 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1187 if (!ptmpvec)
1188 tmpvec = create_tmp_var (vect_type, "vectmp");
1189 else if (!*ptmpvec)
1190 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1191 else
1193 tmpvec = *ptmpvec;
1194 need_asgn = false;
1197 if (need_asgn)
1199 TREE_ADDRESSABLE (tmpvec) = 1;
1200 asgn = gimple_build_assign (tmpvec, vect);
1201 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1204 arraytype = build_array_type_nelts (vect_elt_type, elements);
1205 return build4 (ARRAY_REF, vect_elt_type,
1206 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1207 idx, NULL_TREE, NULL_TREE);
1210 /* Check if VEC_PERM_EXPR within the given setting is supported
1211 by hardware, or lower it piecewise.
1213 When VEC_PERM_EXPR has the same first and second operands:
1214 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1215 {v0[mask[0]], v0[mask[1]], ...}
1216 MASK and V0 must have the same number of elements.
1218 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1219 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1220 V0 and V1 must have the same type. MASK, V0, V1 must have the
1221 same number of arguments. */
1223 static void
1224 lower_vec_perm (gimple_stmt_iterator *gsi)
1226 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1227 tree mask = gimple_assign_rhs3 (stmt);
1228 tree vec0 = gimple_assign_rhs1 (stmt);
1229 tree vec1 = gimple_assign_rhs2 (stmt);
1230 tree vect_type = TREE_TYPE (vec0);
1231 tree mask_type = TREE_TYPE (mask);
1232 tree vect_elt_type = TREE_TYPE (vect_type);
1233 tree mask_elt_type = TREE_TYPE (mask_type);
1234 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1235 vec<constructor_elt, va_gc> *v;
1236 tree constr, t, si, i_val;
1237 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1238 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1239 location_t loc = gimple_location (gsi_stmt (*gsi));
1240 unsigned i;
1242 if (TREE_CODE (mask) == SSA_NAME)
1244 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1245 if (is_gimple_assign (def_stmt)
1246 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1247 mask = gimple_assign_rhs1 (def_stmt);
1250 if (TREE_CODE (mask) == VECTOR_CST)
1252 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1254 for (i = 0; i < elements; ++i)
1255 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1256 & (2 * elements - 1));
1258 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1260 gimple_assign_set_rhs3 (stmt, mask);
1261 update_stmt (stmt);
1262 return;
1265 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1266 return;
1268 warning_at (loc, OPT_Wvector_operation_performance,
1269 "vector shuffling operation will be expanded piecewise");
1271 vec_alloc (v, elements);
1272 for (i = 0; i < elements; i++)
1274 si = size_int (i);
1275 i_val = vector_element (gsi, mask, si, &masktmp);
1277 if (TREE_CODE (i_val) == INTEGER_CST)
1279 unsigned HOST_WIDE_INT index;
1281 index = TREE_INT_CST_LOW (i_val);
1282 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1283 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1285 if (two_operand_p && (index & elements) != 0)
1286 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1287 else
1288 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1290 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1291 true, GSI_SAME_STMT);
1293 else
1295 tree cond = NULL_TREE, v0_val;
1297 if (two_operand_p)
1299 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1300 build_int_cst (mask_elt_type, elements));
1301 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1302 true, GSI_SAME_STMT);
1305 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1306 build_int_cst (mask_elt_type, elements - 1));
1307 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1308 true, GSI_SAME_STMT);
1310 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1311 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1312 true, GSI_SAME_STMT);
1314 if (two_operand_p)
1316 tree v1_val;
1318 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1319 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1320 true, GSI_SAME_STMT);
1322 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1323 cond, build_zero_cst (mask_elt_type));
1324 cond = fold_build3 (COND_EXPR, vect_elt_type,
1325 cond, v0_val, v1_val);
1326 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1327 true, GSI_SAME_STMT);
1329 else
1330 t = v0_val;
1333 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1336 constr = build_constructor (vect_type, v);
1337 gimple_assign_set_rhs_from_tree (gsi, constr);
1338 update_stmt (gsi_stmt (*gsi));
1341 /* If OP is a uniform vector return the element it is a splat from. */
1343 static tree
1344 ssa_uniform_vector_p (tree op)
1346 if (TREE_CODE (op) == VECTOR_CST
1347 || TREE_CODE (op) == CONSTRUCTOR)
1348 return uniform_vector_p (op);
1349 if (TREE_CODE (op) == SSA_NAME)
1351 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1352 if (gimple_assign_single_p (def_stmt))
1353 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1355 return NULL_TREE;
1358 /* Return type in which CODE operation with optab OP can be
1359 computed. */
1361 static tree
1362 get_compute_type (enum tree_code code, optab op, tree type)
1364 /* For very wide vectors, try using a smaller vector mode. */
1365 tree compute_type = type;
1366 if (op
1367 && (!VECTOR_MODE_P (TYPE_MODE (type))
1368 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1370 tree vector_compute_type
1371 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1372 if (vector_compute_type != NULL_TREE
1373 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1374 < TYPE_VECTOR_SUBPARTS (compute_type))
1375 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1376 != CODE_FOR_nothing))
1377 compute_type = vector_compute_type;
1380 /* If we are breaking a BLKmode vector into smaller pieces,
1381 type_for_widest_vector_mode has already looked into the optab,
1382 so skip these checks. */
1383 if (compute_type == type)
1385 machine_mode compute_mode = TYPE_MODE (compute_type);
1386 if (VECTOR_MODE_P (compute_mode))
1388 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1389 return compute_type;
1390 if (code == MULT_HIGHPART_EXPR
1391 && can_mult_highpart_p (compute_mode,
1392 TYPE_UNSIGNED (compute_type)))
1393 return compute_type;
1395 /* There is no operation in hardware, so fall back to scalars. */
1396 compute_type = TREE_TYPE (type);
1399 return compute_type;
1402 /* Helper function of expand_vector_operations_1. Return number of
1403 vector elements for vector types or 1 for other types. */
1405 static inline int
1406 count_type_subparts (tree type)
1408 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1411 static tree
1412 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1413 tree bitpos, tree bitsize, enum tree_code code,
1414 tree type ATTRIBUTE_UNUSED)
1416 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1417 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1418 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1419 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1420 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1421 return gimplify_build3 (gsi, code, inner_type, cond, a, b);
1424 /* Expand a vector COND_EXPR to scalars, piecewise. */
1425 static void
1426 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1428 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1429 tree type = gimple_expr_type (stmt);
1430 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1431 machine_mode compute_mode = TYPE_MODE (compute_type);
1432 gcc_assert (compute_mode != BLKmode);
1433 tree lhs = gimple_assign_lhs (stmt);
1434 tree rhs2 = gimple_assign_rhs2 (stmt);
1435 tree rhs3 = gimple_assign_rhs3 (stmt);
1436 tree new_rhs;
1438 /* If the compute mode is not a vector mode (hence we are not decomposing
1439 a BLKmode vector to smaller, hardware-supported vectors), we may want
1440 to expand the operations in parallel. */
1441 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1442 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1443 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1444 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1445 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1446 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1447 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1448 COND_EXPR);
1449 else
1450 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1451 rhs2, rhs3, COND_EXPR);
1452 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1453 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1454 new_rhs);
1456 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1457 way to do it is change expand_vector_operation and its callees to
1458 return a tree_code, RHS1 and RHS2 instead of a tree. */
1459 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1460 update_stmt (gsi_stmt (*gsi));
1463 /* Process one statement. If we identify a vector operation, expand it. */
1465 static void
1466 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1468 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1469 enum tree_code code;
1470 optab op = unknown_optab;
1471 enum gimple_rhs_class rhs_class;
1472 tree new_rhs;
1474 /* Only consider code == GIMPLE_ASSIGN. */
1475 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1476 if (!stmt)
1477 return;
1479 code = gimple_assign_rhs_code (stmt);
1480 rhs_class = get_gimple_rhs_class (code);
1481 lhs = gimple_assign_lhs (stmt);
1483 if (code == VEC_PERM_EXPR)
1485 lower_vec_perm (gsi);
1486 return;
1489 if (code == VEC_COND_EXPR)
1491 expand_vector_condition (gsi);
1492 return;
1495 if (code == COND_EXPR
1496 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1497 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1499 expand_vector_scalar_condition (gsi);
1500 return;
1503 if (code == CONSTRUCTOR
1504 && TREE_CODE (lhs) == SSA_NAME
1505 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1506 && !gimple_clobber_p (stmt)
1507 && optimize)
1509 optimize_vector_constructor (gsi);
1510 return;
1513 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1514 return;
1516 rhs1 = gimple_assign_rhs1 (stmt);
1517 type = gimple_expr_type (stmt);
1518 if (rhs_class == GIMPLE_BINARY_RHS)
1519 rhs2 = gimple_assign_rhs2 (stmt);
1521 if (TREE_CODE (type) != VECTOR_TYPE)
1522 return;
1524 /* If the vector operation is operating on all same vector elements
1525 implement it with a scalar operation and a splat if the target
1526 supports the scalar operation. */
1527 tree srhs1, srhs2 = NULL_TREE;
1528 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
1529 && (rhs2 == NULL_TREE
1530 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
1531 && (srhs2 = rhs2))
1532 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1533 /* As we query direct optabs restrict to non-convert operations. */
1534 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
1536 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
1537 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
1538 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
1540 tree slhs = make_ssa_name (TREE_TYPE (srhs1));
1541 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
1542 gsi_insert_before (gsi, repl, GSI_SAME_STMT);
1543 gimple_assign_set_rhs_from_tree (gsi,
1544 build_vector_from_val (type, slhs));
1545 update_stmt (stmt);
1546 return;
1550 /* A scalar operation pretending to be a vector one. */
1551 if (VECTOR_BOOLEAN_TYPE_P (type)
1552 && !VECTOR_MODE_P (TYPE_MODE (type))
1553 && TYPE_MODE (type) != BLKmode)
1554 return;
1556 if (CONVERT_EXPR_CODE_P (code)
1557 || code == FLOAT_EXPR
1558 || code == FIX_TRUNC_EXPR
1559 || code == VIEW_CONVERT_EXPR)
1560 return;
1562 /* The signedness is determined from input argument. */
1563 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1564 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1565 type = TREE_TYPE (rhs1);
1567 /* For widening/narrowing vector operations, the relevant type is of the
1568 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1569 calculated in the same way above. */
1570 if (code == WIDEN_SUM_EXPR
1571 || code == VEC_WIDEN_MULT_HI_EXPR
1572 || code == VEC_WIDEN_MULT_LO_EXPR
1573 || code == VEC_WIDEN_MULT_EVEN_EXPR
1574 || code == VEC_WIDEN_MULT_ODD_EXPR
1575 || code == VEC_UNPACK_HI_EXPR
1576 || code == VEC_UNPACK_LO_EXPR
1577 || code == VEC_PACK_TRUNC_EXPR
1578 || code == VEC_PACK_SAT_EXPR
1579 || code == VEC_PACK_FIX_TRUNC_EXPR
1580 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1581 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1582 type = TREE_TYPE (rhs1);
1584 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1585 scalar */
1586 if (code == LSHIFT_EXPR
1587 || code == RSHIFT_EXPR
1588 || code == LROTATE_EXPR
1589 || code == RROTATE_EXPR)
1591 optab opv;
1593 /* Check whether we have vector <op> {x,x,x,x} where x
1594 could be a scalar variable or a constant. Transform
1595 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1596 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1598 tree first;
1600 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1602 gimple_assign_set_rhs2 (stmt, first);
1603 update_stmt (stmt);
1604 rhs2 = first;
1608 opv = optab_for_tree_code (code, type, optab_vector);
1609 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1610 op = opv;
1611 else
1613 op = optab_for_tree_code (code, type, optab_scalar);
1615 compute_type = get_compute_type (code, op, type);
1616 if (compute_type == type)
1617 return;
1618 /* The rtl expander will expand vector/scalar as vector/vector
1619 if necessary. Pick one with wider vector type. */
1620 tree compute_vtype = get_compute_type (code, opv, type);
1621 if (count_type_subparts (compute_vtype)
1622 > count_type_subparts (compute_type))
1624 compute_type = compute_vtype;
1625 op = opv;
1629 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1631 if (compute_type == NULL_TREE)
1632 compute_type = get_compute_type (code, op, type);
1633 if (compute_type == type)
1634 return;
1635 /* Before splitting vector rotates into scalar rotates,
1636 see if we can't use vector shifts and BIT_IOR_EXPR
1637 instead. For vector by vector rotates we'd also
1638 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1639 for now, fold doesn't seem to create such rotates anyway. */
1640 if (compute_type == TREE_TYPE (type)
1641 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1643 optab oplv = vashl_optab, opl = ashl_optab;
1644 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1645 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1646 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1647 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1648 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1649 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1650 /* The rtl expander will expand vector/scalar as vector/vector
1651 if necessary. Pick one with wider vector type. */
1652 if (count_type_subparts (compute_lvtype)
1653 > count_type_subparts (compute_ltype))
1655 compute_ltype = compute_lvtype;
1656 opl = oplv;
1658 if (count_type_subparts (compute_rvtype)
1659 > count_type_subparts (compute_rtype))
1661 compute_rtype = compute_rvtype;
1662 opr = oprv;
1664 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1665 BIT_IOR_EXPR. */
1666 compute_type = compute_ltype;
1667 if (count_type_subparts (compute_type)
1668 > count_type_subparts (compute_rtype))
1669 compute_type = compute_rtype;
1670 if (count_type_subparts (compute_type)
1671 > count_type_subparts (compute_otype))
1672 compute_type = compute_otype;
1673 /* Verify all 3 operations can be performed in that type. */
1674 if (compute_type != TREE_TYPE (type))
1676 if (optab_handler (opl, TYPE_MODE (compute_type))
1677 == CODE_FOR_nothing
1678 || optab_handler (opr, TYPE_MODE (compute_type))
1679 == CODE_FOR_nothing
1680 || optab_handler (opo, TYPE_MODE (compute_type))
1681 == CODE_FOR_nothing)
1682 compute_type = TREE_TYPE (type);
1687 else
1688 op = optab_for_tree_code (code, type, optab_default);
1690 /* Optabs will try converting a negation into a subtraction, so
1691 look for it as well. TODO: negation of floating-point vectors
1692 might be turned into an exclusive OR toggling the sign bit. */
1693 if (op == unknown_optab
1694 && code == NEGATE_EXPR
1695 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1696 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1698 if (compute_type == NULL_TREE)
1699 compute_type = get_compute_type (code, op, type);
1700 if (compute_type == type)
1701 return;
1703 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1705 /* Leave expression untouched for later expansion. */
1706 if (new_rhs == NULL_TREE)
1707 return;
1709 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1710 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1711 new_rhs);
1713 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1714 way to do it is change expand_vector_operation and its callees to
1715 return a tree_code, RHS1 and RHS2 instead of a tree. */
1716 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1717 update_stmt (gsi_stmt (*gsi));
1720 /* Use this to lower vector operations introduced by the vectorizer,
1721 if it may need the bit-twiddling tricks implemented in this file. */
1723 static unsigned int
1724 expand_vector_operations (void)
1726 gimple_stmt_iterator gsi;
1727 basic_block bb;
1728 bool cfg_changed = false;
1730 FOR_EACH_BB_FN (bb, cfun)
1732 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1734 expand_vector_operations_1 (&gsi);
1735 /* ??? If we do not cleanup EH then we will ICE in
1736 verification. But in reality we have created wrong-code
1737 as we did not properly transition EH info and edges to
1738 the piecewise computations. */
1739 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1740 && gimple_purge_dead_eh_edges (bb))
1741 cfg_changed = true;
1745 return cfg_changed ? TODO_cleanup_cfg : 0;
1748 namespace {
1750 const pass_data pass_data_lower_vector =
1752 GIMPLE_PASS, /* type */
1753 "veclower", /* name */
1754 OPTGROUP_VEC, /* optinfo_flags */
1755 TV_NONE, /* tv_id */
1756 PROP_cfg, /* properties_required */
1757 PROP_gimple_lvec, /* properties_provided */
1758 0, /* properties_destroyed */
1759 0, /* todo_flags_start */
1760 TODO_update_ssa, /* todo_flags_finish */
1763 class pass_lower_vector : public gimple_opt_pass
1765 public:
1766 pass_lower_vector (gcc::context *ctxt)
1767 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1770 /* opt_pass methods: */
1771 virtual bool gate (function *fun)
1773 return !(fun->curr_properties & PROP_gimple_lvec);
1776 virtual unsigned int execute (function *)
1778 return expand_vector_operations ();
1781 }; // class pass_lower_vector
1783 } // anon namespace
1785 gimple_opt_pass *
1786 make_pass_lower_vector (gcc::context *ctxt)
1788 return new pass_lower_vector (ctxt);
1791 namespace {
1793 const pass_data pass_data_lower_vector_ssa =
1795 GIMPLE_PASS, /* type */
1796 "veclower2", /* name */
1797 OPTGROUP_VEC, /* optinfo_flags */
1798 TV_NONE, /* tv_id */
1799 PROP_cfg, /* properties_required */
1800 PROP_gimple_lvec, /* properties_provided */
1801 0, /* properties_destroyed */
1802 0, /* todo_flags_start */
1803 ( TODO_update_ssa
1804 | TODO_cleanup_cfg ), /* todo_flags_finish */
1807 class pass_lower_vector_ssa : public gimple_opt_pass
1809 public:
1810 pass_lower_vector_ssa (gcc::context *ctxt)
1811 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1814 /* opt_pass methods: */
1815 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1816 virtual unsigned int execute (function *)
1818 return expand_vector_operations ();
1821 }; // class pass_lower_vector_ssa
1823 } // anon namespace
1825 gimple_opt_pass *
1826 make_pass_lower_vector_ssa (gcc::context *ctxt)
1828 return new pass_lower_vector_ssa (ctxt);
1831 #include "gt-tree-vect-generic.h"