Update ChangeLog and version files for release
[official-gcc.git] / gcc / tree-vect-generic.c
blobf33ff51fb112dcdd21eeb726d79240b4c78c0147
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "hash-set.h"
24 #include "machmode.h"
25 #include "vec.h"
26 #include "double-int.h"
27 #include "input.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "options.h"
31 #include "wide-int.h"
32 #include "inchash.h"
33 #include "tree.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "tm.h"
37 #include "langhooks.h"
38 #include "predict.h"
39 #include "hard-reg-set.h"
40 #include "function.h"
41 #include "dominance.h"
42 #include "cfg.h"
43 #include "basic-block.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
46 #include "tree-eh.h"
47 #include "gimple-expr.h"
48 #include "is-a.h"
49 #include "gimple.h"
50 #include "gimple-iterator.h"
51 #include "gimplify-me.h"
52 #include "gimple-ssa.h"
53 #include "tree-cfg.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-iterator.h"
57 #include "tree-pass.h"
58 #include "flags.h"
59 #include "diagnostic.h"
60 #include "target.h"
62 /* Need to include rtl.h, expr.h, etc. for optabs. */
63 #include "hashtab.h"
64 #include "rtl.h"
65 #include "statistics.h"
66 #include "real.h"
67 #include "fixed-value.h"
68 #include "insn-config.h"
69 #include "expmed.h"
70 #include "dojump.h"
71 #include "explow.h"
72 #include "calls.h"
73 #include "emit-rtl.h"
74 #include "varasm.h"
75 #include "stmt.h"
76 #include "expr.h"
77 #include "insn-codes.h"
78 #include "optabs.h"
81 static void expand_vector_operations_1 (gimple_stmt_iterator *);
84 /* Build a constant of type TYPE, made of VALUE's bits replicated
85 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
86 static tree
87 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
89 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
90 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
91 / HOST_BITS_PER_WIDE_INT;
92 unsigned HOST_WIDE_INT low, mask;
93 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
94 int i;
96 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
98 if (width == HOST_BITS_PER_WIDE_INT)
99 low = value;
100 else
102 mask = ((HOST_WIDE_INT)1 << width) - 1;
103 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
106 for (i = 0; i < n; i++)
107 a[i] = low;
109 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
110 return wide_int_to_tree
111 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
114 static GTY(()) tree vector_inner_type;
115 static GTY(()) tree vector_last_type;
116 static GTY(()) int vector_last_nunits;
118 /* Return a suitable vector types made of SUBPARTS units each of mode
119 "word_mode" (the global variable). */
120 static tree
121 build_word_mode_vector_type (int nunits)
123 if (!vector_inner_type)
124 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
125 else if (vector_last_nunits == nunits)
127 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
128 return vector_last_type;
131 /* We build a new type, but we canonicalize it nevertheless,
132 because it still saves some memory. */
133 vector_last_nunits = nunits;
134 vector_last_type = type_hash_canon (nunits,
135 build_vector_type (vector_inner_type,
136 nunits));
137 return vector_last_type;
140 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
141 tree, tree, tree, tree, tree, enum tree_code);
143 static inline tree
144 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
145 tree t, tree bitsize, tree bitpos)
147 if (bitpos)
148 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
149 else
150 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
153 static tree
154 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
155 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
156 enum tree_code code)
158 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
159 return gimplify_build1 (gsi, code, inner_type, a);
162 static tree
163 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
164 tree bitpos, tree bitsize, enum tree_code code)
166 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
167 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
168 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
169 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
170 return gimplify_build2 (gsi, code, inner_type, a, b);
173 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
175 INNER_TYPE is the type of A and B elements
177 returned expression is of signed integer type with the
178 size equal to the size of INNER_TYPE. */
179 static tree
180 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
181 tree bitpos, tree bitsize, enum tree_code code)
183 tree comp_type;
185 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
186 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
188 comp_type = build_nonstandard_integer_type
189 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
191 return gimplify_build3 (gsi, COND_EXPR, comp_type,
192 fold_build2 (code, boolean_type_node, a, b),
193 build_int_cst (comp_type, -1),
194 build_int_cst (comp_type, 0));
197 /* Expand vector addition to scalars. This does bit twiddling
198 in order to increase parallelism:
200 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
201 (a ^ b) & 0x80808080
203 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
204 (a ^ ~b) & 0x80808080
206 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
208 This optimization should be done only if 4 vector items or more
209 fit into a word. */
210 static tree
211 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
212 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
213 enum tree_code code)
215 tree inner_type = TREE_TYPE (TREE_TYPE (a));
216 unsigned HOST_WIDE_INT max;
217 tree low_bits, high_bits, a_low, b_low, result_low, signs;
219 max = GET_MODE_MASK (TYPE_MODE (inner_type));
220 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
221 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
223 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
224 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
226 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
227 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
228 if (code == PLUS_EXPR)
229 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
230 else
232 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
233 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
236 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
237 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
238 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
241 static tree
242 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
243 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
244 tree bitsize ATTRIBUTE_UNUSED,
245 enum tree_code code ATTRIBUTE_UNUSED)
247 tree inner_type = TREE_TYPE (TREE_TYPE (b));
248 HOST_WIDE_INT max;
249 tree low_bits, high_bits, b_low, result_low, signs;
251 max = GET_MODE_MASK (TYPE_MODE (inner_type));
252 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
253 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
255 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
257 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
258 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
259 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
260 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
261 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
264 /* Expand a vector operation to scalars, by using many operations
265 whose type is the vector type's inner type. */
266 static tree
267 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
268 tree type, tree inner_type,
269 tree a, tree b, enum tree_code code)
271 vec<constructor_elt, va_gc> *v;
272 tree part_width = TYPE_SIZE (inner_type);
273 tree index = bitsize_int (0);
274 int nunits = TYPE_VECTOR_SUBPARTS (type);
275 int delta = tree_to_uhwi (part_width)
276 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
277 int i;
278 location_t loc = gimple_location (gsi_stmt (*gsi));
280 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
281 warning_at (loc, OPT_Wvector_operation_performance,
282 "vector operation will be expanded piecewise");
283 else
284 warning_at (loc, OPT_Wvector_operation_performance,
285 "vector operation will be expanded in parallel");
287 vec_alloc (v, (nunits + delta - 1) / delta);
288 for (i = 0; i < nunits;
289 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
291 tree result = f (gsi, inner_type, a, b, index, part_width, code);
292 constructor_elt ce = {NULL_TREE, result};
293 v->quick_push (ce);
296 return build_constructor (type, v);
299 /* Expand a vector operation to scalars with the freedom to use
300 a scalar integer type, or to use a different size for the items
301 in the vector type. */
302 static tree
303 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
304 tree a, tree b,
305 enum tree_code code)
307 tree result, compute_type;
308 machine_mode mode;
309 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
310 location_t loc = gimple_location (gsi_stmt (*gsi));
312 /* We have three strategies. If the type is already correct, just do
313 the operation an element at a time. Else, if the vector is wider than
314 one word, do it a word at a time; finally, if the vector is smaller
315 than one word, do it as a scalar. */
316 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
317 return expand_vector_piecewise (gsi, f,
318 type, TREE_TYPE (type),
319 a, b, code);
320 else if (n_words > 1)
322 tree word_type = build_word_mode_vector_type (n_words);
323 result = expand_vector_piecewise (gsi, f,
324 word_type, TREE_TYPE (word_type),
325 a, b, code);
326 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
327 GSI_SAME_STMT);
329 else
331 /* Use a single scalar operation with a mode no wider than word_mode. */
332 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
333 compute_type = lang_hooks.types.type_for_mode (mode, 1);
334 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
335 warning_at (loc, OPT_Wvector_operation_performance,
336 "vector operation will be expanded with a "
337 "single scalar operation");
340 return result;
343 /* Expand a vector operation to scalars; for integer types we can use
344 special bit twiddling tricks to do the sums a word at a time, using
345 function F_PARALLEL instead of F. These tricks are done only if
346 they can process at least four items, that is, only if the vector
347 holds at least four items and if a word can hold four items. */
348 static tree
349 expand_vector_addition (gimple_stmt_iterator *gsi,
350 elem_op_func f, elem_op_func f_parallel,
351 tree type, tree a, tree b, enum tree_code code)
353 int parts_per_word = UNITS_PER_WORD
354 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
356 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
357 && parts_per_word >= 4
358 && TYPE_VECTOR_SUBPARTS (type) >= 4)
359 return expand_vector_parallel (gsi, f_parallel,
360 type, a, b, code);
361 else
362 return expand_vector_piecewise (gsi, f,
363 type, TREE_TYPE (type),
364 a, b, code);
367 /* Try to expand vector comparison expression OP0 CODE OP1 by
368 querying optab if the following expression:
369 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
370 can be expanded. */
371 static tree
372 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
373 tree op1, enum tree_code code)
375 tree t;
376 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
377 t = expand_vector_piecewise (gsi, do_compare, type,
378 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
379 else
380 t = NULL_TREE;
382 return t;
385 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
386 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
387 the result if successful, otherwise return NULL_TREE. */
388 static tree
389 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
391 optab op;
392 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
393 bool scalar_shift = true;
395 for (i = 1; i < nunits; i++)
397 if (shiftcnts[i] != shiftcnts[0])
398 scalar_shift = false;
401 if (scalar_shift && shiftcnts[0] == 0)
402 return op0;
404 if (scalar_shift)
406 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
407 if (op != unknown_optab
408 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
409 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
410 build_int_cst (NULL_TREE, shiftcnts[0]));
413 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
414 if (op != unknown_optab
415 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
417 tree *vec = XALLOCAVEC (tree, nunits);
418 for (i = 0; i < nunits; i++)
419 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
420 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
421 build_vector (type, vec));
424 return NULL_TREE;
427 /* Try to expand integer vector division by constant using
428 widening multiply, shifts and additions. */
429 static tree
430 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
431 tree op1, enum tree_code code)
433 bool use_pow2 = true;
434 bool has_vector_shift = true;
435 int mode = -1, this_mode;
436 int pre_shift = -1, post_shift;
437 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
438 int *shifts = XALLOCAVEC (int, nunits * 4);
439 int *pre_shifts = shifts + nunits;
440 int *post_shifts = pre_shifts + nunits;
441 int *shift_temps = post_shifts + nunits;
442 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
443 int prec = TYPE_PRECISION (TREE_TYPE (type));
444 int dummy_int;
445 unsigned int i;
446 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
447 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
448 tree *vec;
449 tree cur_op, mulcst, tem;
450 optab op;
452 if (prec > HOST_BITS_PER_WIDE_INT)
453 return NULL_TREE;
455 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
456 if (op == unknown_optab
457 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
458 has_vector_shift = false;
460 /* Analysis phase. Determine if all op1 elements are either power
461 of two and it is possible to expand it using shifts (or for remainder
462 using masking). Additionally compute the multiplicative constants
463 and pre and post shifts if the division is to be expanded using
464 widening or high part multiplication plus shifts. */
465 for (i = 0; i < nunits; i++)
467 tree cst = VECTOR_CST_ELT (op1, i);
468 unsigned HOST_WIDE_INT ml;
470 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
471 return NULL_TREE;
472 pre_shifts[i] = 0;
473 post_shifts[i] = 0;
474 mulc[i] = 0;
475 if (use_pow2
476 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
477 use_pow2 = false;
478 if (use_pow2)
480 shifts[i] = tree_log2 (cst);
481 if (shifts[i] != shifts[0]
482 && code == TRUNC_DIV_EXPR
483 && !has_vector_shift)
484 use_pow2 = false;
486 if (mode == -2)
487 continue;
488 if (sign_p == UNSIGNED)
490 unsigned HOST_WIDE_INT mh;
491 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
493 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
494 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
495 return NULL_TREE;
497 if (d <= 1)
499 mode = -2;
500 continue;
503 /* Find a suitable multiplier and right shift count
504 instead of multiplying with D. */
505 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
507 /* If the suggested multiplier is more than SIZE bits, we can
508 do better for even divisors, using an initial right shift. */
509 if ((mh != 0 && (d & 1) == 0)
510 || (!has_vector_shift && pre_shift != -1))
512 if (has_vector_shift)
513 pre_shift = floor_log2 (d & -d);
514 else if (pre_shift == -1)
516 unsigned int j;
517 for (j = 0; j < nunits; j++)
519 tree cst2 = VECTOR_CST_ELT (op1, j);
520 unsigned HOST_WIDE_INT d2;
521 int this_pre_shift;
523 if (!tree_fits_uhwi_p (cst2))
524 return NULL_TREE;
525 d2 = tree_to_uhwi (cst2) & mask;
526 if (d2 == 0)
527 return NULL_TREE;
528 this_pre_shift = floor_log2 (d2 & -d2);
529 if (pre_shift == -1 || this_pre_shift < pre_shift)
530 pre_shift = this_pre_shift;
532 if (i != 0 && pre_shift != 0)
534 /* Restart. */
535 i = -1U;
536 mode = -1;
537 continue;
540 if (pre_shift != 0)
542 if ((d >> pre_shift) <= 1)
544 mode = -2;
545 continue;
547 mh = choose_multiplier (d >> pre_shift, prec,
548 prec - pre_shift,
549 &ml, &post_shift, &dummy_int);
550 gcc_assert (!mh);
551 pre_shifts[i] = pre_shift;
554 if (!mh)
555 this_mode = 0;
556 else
557 this_mode = 1;
559 else
561 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
562 unsigned HOST_WIDE_INT abs_d;
564 if (d == -1)
565 return NULL_TREE;
567 /* Since d might be INT_MIN, we have to cast to
568 unsigned HOST_WIDE_INT before negating to avoid
569 undefined signed overflow. */
570 abs_d = (d >= 0
571 ? (unsigned HOST_WIDE_INT) d
572 : - (unsigned HOST_WIDE_INT) d);
574 /* n rem d = n rem -d */
575 if (code == TRUNC_MOD_EXPR && d < 0)
576 d = abs_d;
577 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
579 /* This case is not handled correctly below. */
580 mode = -2;
581 continue;
583 if (abs_d <= 1)
585 mode = -2;
586 continue;
589 choose_multiplier (abs_d, prec, prec - 1, &ml,
590 &post_shift, &dummy_int);
591 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
593 this_mode = 4 + (d < 0);
594 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
596 else
597 this_mode = 2 + (d < 0);
599 mulc[i] = ml;
600 post_shifts[i] = post_shift;
601 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
602 || post_shift >= prec
603 || pre_shifts[i] >= prec)
604 this_mode = -2;
606 if (i == 0)
607 mode = this_mode;
608 else if (mode != this_mode)
609 mode = -2;
612 vec = XALLOCAVEC (tree, nunits);
614 if (use_pow2)
616 tree addend = NULL_TREE;
617 if (sign_p == SIGNED)
619 tree uns_type;
621 /* Both division and remainder sequences need
622 op0 < 0 ? mask : 0 computed. It can be either computed as
623 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
624 if none of the shifts is 0, or as the conditional. */
625 for (i = 0; i < nunits; i++)
626 if (shifts[i] == 0)
627 break;
628 uns_type
629 = build_vector_type (build_nonstandard_integer_type (prec, 1),
630 nunits);
631 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
633 for (i = 0; i < nunits; i++)
634 shift_temps[i] = prec - 1;
635 cur_op = add_rshift (gsi, type, op0, shift_temps);
636 if (cur_op != NULL_TREE)
638 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
639 uns_type, cur_op);
640 for (i = 0; i < nunits; i++)
641 shift_temps[i] = prec - shifts[i];
642 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
643 if (cur_op != NULL_TREE)
644 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
645 type, cur_op);
648 if (addend == NULL_TREE
649 && expand_vec_cond_expr_p (type, type))
651 tree zero, cst, cond;
652 gimple stmt;
654 zero = build_zero_cst (type);
655 cond = build2 (LT_EXPR, type, op0, zero);
656 for (i = 0; i < nunits; i++)
657 vec[i] = build_int_cst (TREE_TYPE (type),
658 ((unsigned HOST_WIDE_INT) 1
659 << shifts[i]) - 1);
660 cst = build_vector (type, vec);
661 addend = make_ssa_name (type);
662 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
663 cst, zero);
664 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
667 if (code == TRUNC_DIV_EXPR)
669 if (sign_p == UNSIGNED)
671 /* q = op0 >> shift; */
672 cur_op = add_rshift (gsi, type, op0, shifts);
673 if (cur_op != NULL_TREE)
674 return cur_op;
676 else if (addend != NULL_TREE)
678 /* t1 = op0 + addend;
679 q = t1 >> shift; */
680 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
681 if (op != unknown_optab
682 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
684 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
685 cur_op = add_rshift (gsi, type, cur_op, shifts);
686 if (cur_op != NULL_TREE)
687 return cur_op;
691 else
693 tree mask;
694 for (i = 0; i < nunits; i++)
695 vec[i] = build_int_cst (TREE_TYPE (type),
696 ((unsigned HOST_WIDE_INT) 1
697 << shifts[i]) - 1);
698 mask = build_vector (type, vec);
699 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
700 if (op != unknown_optab
701 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
703 if (sign_p == UNSIGNED)
704 /* r = op0 & mask; */
705 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
706 else if (addend != NULL_TREE)
708 /* t1 = op0 + addend;
709 t2 = t1 & mask;
710 r = t2 - addend; */
711 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
712 if (op != unknown_optab
713 && optab_handler (op, TYPE_MODE (type))
714 != CODE_FOR_nothing)
716 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
717 addend);
718 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
719 cur_op, mask);
720 op = optab_for_tree_code (MINUS_EXPR, type,
721 optab_default);
722 if (op != unknown_optab
723 && optab_handler (op, TYPE_MODE (type))
724 != CODE_FOR_nothing)
725 return gimplify_build2 (gsi, MINUS_EXPR, type,
726 cur_op, addend);
733 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
734 return NULL_TREE;
736 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
737 return NULL_TREE;
739 cur_op = op0;
741 switch (mode)
743 case 0:
744 gcc_assert (sign_p == UNSIGNED);
745 /* t1 = oprnd0 >> pre_shift;
746 t2 = t1 h* ml;
747 q = t2 >> post_shift; */
748 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
749 if (cur_op == NULL_TREE)
750 return NULL_TREE;
751 break;
752 case 1:
753 gcc_assert (sign_p == UNSIGNED);
754 for (i = 0; i < nunits; i++)
756 shift_temps[i] = 1;
757 post_shifts[i]--;
759 break;
760 case 2:
761 case 3:
762 case 4:
763 case 5:
764 gcc_assert (sign_p == SIGNED);
765 for (i = 0; i < nunits; i++)
766 shift_temps[i] = prec - 1;
767 break;
768 default:
769 return NULL_TREE;
772 for (i = 0; i < nunits; i++)
773 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
774 mulcst = build_vector (type, vec);
776 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
778 switch (mode)
780 case 0:
781 /* t1 = oprnd0 >> pre_shift;
782 t2 = t1 h* ml;
783 q = t2 >> post_shift; */
784 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
785 break;
786 case 1:
787 /* t1 = oprnd0 h* ml;
788 t2 = oprnd0 - t1;
789 t3 = t2 >> 1;
790 t4 = t1 + t3;
791 q = t4 >> (post_shift - 1); */
792 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
793 if (op == unknown_optab
794 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
795 return NULL_TREE;
796 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
797 tem = add_rshift (gsi, type, tem, shift_temps);
798 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
799 if (op == unknown_optab
800 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
801 return NULL_TREE;
802 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
803 cur_op = add_rshift (gsi, type, tem, post_shifts);
804 if (cur_op == NULL_TREE)
805 return NULL_TREE;
806 break;
807 case 2:
808 case 3:
809 case 4:
810 case 5:
811 /* t1 = oprnd0 h* ml;
812 t2 = t1; [ iff (mode & 2) != 0 ]
813 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
814 t3 = t2 >> post_shift;
815 t4 = oprnd0 >> (prec - 1);
816 q = t3 - t4; [ iff (mode & 1) == 0 ]
817 q = t4 - t3; [ iff (mode & 1) != 0 ] */
818 if ((mode & 2) == 0)
820 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
821 if (op == unknown_optab
822 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
823 return NULL_TREE;
824 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
826 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
827 if (cur_op == NULL_TREE)
828 return NULL_TREE;
829 tem = add_rshift (gsi, type, op0, shift_temps);
830 if (tem == NULL_TREE)
831 return NULL_TREE;
832 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
833 if (op == unknown_optab
834 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
835 return NULL_TREE;
836 if ((mode & 1) == 0)
837 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
838 else
839 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
840 break;
841 default:
842 gcc_unreachable ();
845 if (code == TRUNC_DIV_EXPR)
846 return cur_op;
848 /* We divided. Now finish by:
849 t1 = q * oprnd1;
850 r = oprnd0 - t1; */
851 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
852 if (op == unknown_optab
853 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
854 return NULL_TREE;
855 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
856 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
857 if (op == unknown_optab
858 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
859 return NULL_TREE;
860 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
863 /* Expand a vector condition to scalars, by using many conditions
864 on the vector's elements. */
865 static void
866 expand_vector_condition (gimple_stmt_iterator *gsi)
868 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
869 tree type = gimple_expr_type (stmt);
870 tree a = gimple_assign_rhs1 (stmt);
871 tree a1 = a;
872 tree a2;
873 bool a_is_comparison = false;
874 tree b = gimple_assign_rhs2 (stmt);
875 tree c = gimple_assign_rhs3 (stmt);
876 vec<constructor_elt, va_gc> *v;
877 tree constr;
878 tree inner_type = TREE_TYPE (type);
879 tree cond_type = TREE_TYPE (TREE_TYPE (a));
880 tree comp_inner_type = cond_type;
881 tree width = TYPE_SIZE (inner_type);
882 tree index = bitsize_int (0);
883 int nunits = TYPE_VECTOR_SUBPARTS (type);
884 int i;
885 location_t loc = gimple_location (gsi_stmt (*gsi));
887 if (!is_gimple_val (a))
889 gcc_assert (COMPARISON_CLASS_P (a));
890 a_is_comparison = true;
891 a1 = TREE_OPERAND (a, 0);
892 a2 = TREE_OPERAND (a, 1);
893 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
896 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
897 return;
899 /* TODO: try and find a smaller vector type. */
901 warning_at (loc, OPT_Wvector_operation_performance,
902 "vector condition will be expanded piecewise");
904 vec_alloc (v, nunits);
905 for (i = 0; i < nunits;
906 i++, index = int_const_binop (PLUS_EXPR, index, width))
908 tree aa, result;
909 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
910 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
911 if (a_is_comparison)
913 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
914 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
915 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
917 else
918 aa = tree_vec_extract (gsi, cond_type, a, width, index);
919 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
920 constructor_elt ce = {NULL_TREE, result};
921 v->quick_push (ce);
924 constr = build_constructor (type, v);
925 gimple_assign_set_rhs_from_tree (gsi, constr);
926 update_stmt (gsi_stmt (*gsi));
929 static tree
930 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
931 gassign *assign, enum tree_code code)
933 machine_mode compute_mode = TYPE_MODE (compute_type);
935 /* If the compute mode is not a vector mode (hence we are not decomposing
936 a BLKmode vector to smaller, hardware-supported vectors), we may want
937 to expand the operations in parallel. */
938 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
939 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
940 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
941 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
942 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
943 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
944 switch (code)
946 case PLUS_EXPR:
947 case MINUS_EXPR:
948 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
949 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
950 gimple_assign_rhs1 (assign),
951 gimple_assign_rhs2 (assign), code);
952 break;
954 case NEGATE_EXPR:
955 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
956 return expand_vector_addition (gsi, do_unop, do_negate, type,
957 gimple_assign_rhs1 (assign),
958 NULL_TREE, code);
959 break;
961 case BIT_AND_EXPR:
962 case BIT_IOR_EXPR:
963 case BIT_XOR_EXPR:
964 return expand_vector_parallel (gsi, do_binop, type,
965 gimple_assign_rhs1 (assign),
966 gimple_assign_rhs2 (assign), code);
968 case BIT_NOT_EXPR:
969 return expand_vector_parallel (gsi, do_unop, type,
970 gimple_assign_rhs1 (assign),
971 NULL_TREE, code);
972 case EQ_EXPR:
973 case NE_EXPR:
974 case GT_EXPR:
975 case LT_EXPR:
976 case GE_EXPR:
977 case LE_EXPR:
978 case UNEQ_EXPR:
979 case UNGT_EXPR:
980 case UNLT_EXPR:
981 case UNGE_EXPR:
982 case UNLE_EXPR:
983 case LTGT_EXPR:
984 case ORDERED_EXPR:
985 case UNORDERED_EXPR:
987 tree rhs1 = gimple_assign_rhs1 (assign);
988 tree rhs2 = gimple_assign_rhs2 (assign);
990 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
993 case TRUNC_DIV_EXPR:
994 case TRUNC_MOD_EXPR:
996 tree rhs1 = gimple_assign_rhs1 (assign);
997 tree rhs2 = gimple_assign_rhs2 (assign);
998 tree ret;
1000 if (!optimize
1001 || !VECTOR_INTEGER_TYPE_P (type)
1002 || TREE_CODE (rhs2) != VECTOR_CST
1003 || !VECTOR_MODE_P (TYPE_MODE (type)))
1004 break;
1006 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1007 if (ret != NULL_TREE)
1008 return ret;
1009 break;
1012 default:
1013 break;
1016 if (TREE_CODE_CLASS (code) == tcc_unary)
1017 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1018 gimple_assign_rhs1 (assign),
1019 NULL_TREE, code);
1020 else
1021 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1022 gimple_assign_rhs1 (assign),
1023 gimple_assign_rhs2 (assign), code);
1026 /* Try to optimize
1027 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1028 style stmts into:
1029 _9 = { b_7, b_7, b_7, b_7 };
1030 a_5 = _9 + { 0, 3, 6, 9 };
1031 because vector splat operation is usually more efficient
1032 than piecewise initialization of the vector. */
1034 static void
1035 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1037 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1038 tree lhs = gimple_assign_lhs (stmt);
1039 tree rhs = gimple_assign_rhs1 (stmt);
1040 tree type = TREE_TYPE (rhs);
1041 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1042 bool all_same = true;
1043 constructor_elt *elt;
1044 tree *cst;
1045 gimple g;
1046 tree base = NULL_TREE;
1047 optab op;
1049 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1050 return;
1051 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1052 if (op == unknown_optab
1053 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1054 return;
1055 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1056 if (TREE_CODE (elt->value) != SSA_NAME
1057 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1058 return;
1059 else
1061 tree this_base = elt->value;
1062 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1063 all_same = false;
1064 for (j = 0; j < nelts + 1; j++)
1066 g = SSA_NAME_DEF_STMT (this_base);
1067 if (is_gimple_assign (g)
1068 && gimple_assign_rhs_code (g) == PLUS_EXPR
1069 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1070 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1071 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1072 this_base = gimple_assign_rhs1 (g);
1073 else
1074 break;
1076 if (i == 0)
1077 base = this_base;
1078 else if (this_base != base)
1079 return;
1081 if (all_same)
1082 return;
1083 cst = XALLOCAVEC (tree, nelts);
1084 for (i = 0; i < nelts; i++)
1086 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1087 cst[i] = build_zero_cst (TREE_TYPE (base));
1088 while (this_base != base)
1090 g = SSA_NAME_DEF_STMT (this_base);
1091 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1092 cst[i], gimple_assign_rhs2 (g));
1093 if (cst[i] == NULL_TREE
1094 || TREE_CODE (cst[i]) != INTEGER_CST
1095 || TREE_OVERFLOW (cst[i]))
1096 return;
1097 this_base = gimple_assign_rhs1 (g);
1100 for (i = 0; i < nelts; i++)
1101 CONSTRUCTOR_ELT (rhs, i)->value = base;
1102 g = gimple_build_assign (make_ssa_name (type), rhs);
1103 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1104 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1105 build_vector (type, cst));
1106 gsi_replace (gsi, g, false);
1109 /* Return a type for the widest vector mode whose components are of type
1110 TYPE, or NULL_TREE if none is found. */
1112 static tree
1113 type_for_widest_vector_mode (tree type, optab op)
1115 machine_mode inner_mode = TYPE_MODE (type);
1116 machine_mode best_mode = VOIDmode, mode;
1117 int best_nunits = 0;
1119 if (SCALAR_FLOAT_MODE_P (inner_mode))
1120 mode = MIN_MODE_VECTOR_FLOAT;
1121 else if (SCALAR_FRACT_MODE_P (inner_mode))
1122 mode = MIN_MODE_VECTOR_FRACT;
1123 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1124 mode = MIN_MODE_VECTOR_UFRACT;
1125 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1126 mode = MIN_MODE_VECTOR_ACCUM;
1127 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1128 mode = MIN_MODE_VECTOR_UACCUM;
1129 else
1130 mode = MIN_MODE_VECTOR_INT;
1132 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1133 if (GET_MODE_INNER (mode) == inner_mode
1134 && GET_MODE_NUNITS (mode) > best_nunits
1135 && optab_handler (op, mode) != CODE_FOR_nothing)
1136 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1138 if (best_mode == VOIDmode)
1139 return NULL_TREE;
1140 else
1141 return build_vector_type_for_mode (type, best_mode);
1145 /* Build a reference to the element of the vector VECT. Function
1146 returns either the element itself, either BIT_FIELD_REF, or an
1147 ARRAY_REF expression.
1149 GSI is required to insert temporary variables while building a
1150 refernece to the element of the vector VECT.
1152 PTMPVEC is a pointer to the temporary variable for caching
1153 purposes. In case when PTMPVEC is NULL new temporary variable
1154 will be created. */
1155 static tree
1156 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1158 tree vect_type, vect_elt_type;
1159 gimple asgn;
1160 tree tmpvec;
1161 tree arraytype;
1162 bool need_asgn = true;
1163 unsigned int elements;
1165 vect_type = TREE_TYPE (vect);
1166 vect_elt_type = TREE_TYPE (vect_type);
1167 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1169 if (TREE_CODE (idx) == INTEGER_CST)
1171 unsigned HOST_WIDE_INT index;
1173 /* Given that we're about to compute a binary modulus,
1174 we don't care about the high bits of the value. */
1175 index = TREE_INT_CST_LOW (idx);
1176 if (!tree_fits_uhwi_p (idx) || index >= elements)
1178 index &= elements - 1;
1179 idx = build_int_cst (TREE_TYPE (idx), index);
1182 /* When lowering a vector statement sequence do some easy
1183 simplification by looking through intermediate vector results. */
1184 if (TREE_CODE (vect) == SSA_NAME)
1186 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1187 if (is_gimple_assign (def_stmt)
1188 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1189 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1190 vect = gimple_assign_rhs1 (def_stmt);
1193 if (TREE_CODE (vect) == VECTOR_CST)
1194 return VECTOR_CST_ELT (vect, index);
1195 else if (TREE_CODE (vect) == CONSTRUCTOR
1196 && (CONSTRUCTOR_NELTS (vect) == 0
1197 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1198 != VECTOR_TYPE))
1200 if (index < CONSTRUCTOR_NELTS (vect))
1201 return CONSTRUCTOR_ELT (vect, index)->value;
1202 return build_zero_cst (vect_elt_type);
1204 else
1206 tree size = TYPE_SIZE (vect_elt_type);
1207 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1208 size);
1209 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1213 if (!ptmpvec)
1214 tmpvec = create_tmp_var (vect_type, "vectmp");
1215 else if (!*ptmpvec)
1216 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1217 else
1219 tmpvec = *ptmpvec;
1220 need_asgn = false;
1223 if (need_asgn)
1225 TREE_ADDRESSABLE (tmpvec) = 1;
1226 asgn = gimple_build_assign (tmpvec, vect);
1227 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1230 arraytype = build_array_type_nelts (vect_elt_type, elements);
1231 return build4 (ARRAY_REF, vect_elt_type,
1232 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1233 idx, NULL_TREE, NULL_TREE);
1236 /* Check if VEC_PERM_EXPR within the given setting is supported
1237 by hardware, or lower it piecewise.
1239 When VEC_PERM_EXPR has the same first and second operands:
1240 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1241 {v0[mask[0]], v0[mask[1]], ...}
1242 MASK and V0 must have the same number of elements.
1244 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1245 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1246 V0 and V1 must have the same type. MASK, V0, V1 must have the
1247 same number of arguments. */
1249 static void
1250 lower_vec_perm (gimple_stmt_iterator *gsi)
1252 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1253 tree mask = gimple_assign_rhs3 (stmt);
1254 tree vec0 = gimple_assign_rhs1 (stmt);
1255 tree vec1 = gimple_assign_rhs2 (stmt);
1256 tree vect_type = TREE_TYPE (vec0);
1257 tree mask_type = TREE_TYPE (mask);
1258 tree vect_elt_type = TREE_TYPE (vect_type);
1259 tree mask_elt_type = TREE_TYPE (mask_type);
1260 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1261 vec<constructor_elt, va_gc> *v;
1262 tree constr, t, si, i_val;
1263 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1264 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1265 location_t loc = gimple_location (gsi_stmt (*gsi));
1266 unsigned i;
1268 if (TREE_CODE (mask) == SSA_NAME)
1270 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1271 if (is_gimple_assign (def_stmt)
1272 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1273 mask = gimple_assign_rhs1 (def_stmt);
1276 if (TREE_CODE (mask) == VECTOR_CST)
1278 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1280 for (i = 0; i < elements; ++i)
1281 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1282 & (2 * elements - 1));
1284 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1286 gimple_assign_set_rhs3 (stmt, mask);
1287 update_stmt (stmt);
1288 return;
1290 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1291 vector as VEC1 and a right element shift MASK. */
1292 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1293 != CODE_FOR_nothing
1294 && TREE_CODE (vec1) == VECTOR_CST
1295 && initializer_zerop (vec1)
1296 && sel_int[0]
1297 && sel_int[0] < elements)
1299 for (i = 1; i < elements; ++i)
1301 unsigned int expected = i + sel_int[0];
1302 /* Indices into the second vector are all equivalent. */
1303 if (MIN (elements, (unsigned) sel_int[i])
1304 != MIN (elements, expected))
1305 break;
1307 if (i == elements)
1309 gimple_assign_set_rhs3 (stmt, mask);
1310 update_stmt (stmt);
1311 return;
1315 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1316 return;
1318 warning_at (loc, OPT_Wvector_operation_performance,
1319 "vector shuffling operation will be expanded piecewise");
1321 vec_alloc (v, elements);
1322 for (i = 0; i < elements; i++)
1324 si = size_int (i);
1325 i_val = vector_element (gsi, mask, si, &masktmp);
1327 if (TREE_CODE (i_val) == INTEGER_CST)
1329 unsigned HOST_WIDE_INT index;
1331 index = TREE_INT_CST_LOW (i_val);
1332 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1333 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1335 if (two_operand_p && (index & elements) != 0)
1336 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1337 else
1338 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1340 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1341 true, GSI_SAME_STMT);
1343 else
1345 tree cond = NULL_TREE, v0_val;
1347 if (two_operand_p)
1349 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1350 build_int_cst (mask_elt_type, elements));
1351 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1352 true, GSI_SAME_STMT);
1355 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1356 build_int_cst (mask_elt_type, elements - 1));
1357 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1358 true, GSI_SAME_STMT);
1360 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1361 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1362 true, GSI_SAME_STMT);
1364 if (two_operand_p)
1366 tree v1_val;
1368 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1369 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1370 true, GSI_SAME_STMT);
1372 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1373 cond, build_zero_cst (mask_elt_type));
1374 cond = fold_build3 (COND_EXPR, vect_elt_type,
1375 cond, v0_val, v1_val);
1376 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1377 true, GSI_SAME_STMT);
1379 else
1380 t = v0_val;
1383 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1386 constr = build_constructor (vect_type, v);
1387 gimple_assign_set_rhs_from_tree (gsi, constr);
1388 update_stmt (gsi_stmt (*gsi));
1391 /* Return type in which CODE operation with optab OP can be
1392 computed. */
1394 static tree
1395 get_compute_type (enum tree_code code, optab op, tree type)
1397 /* For very wide vectors, try using a smaller vector mode. */
1398 tree compute_type = type;
1399 if (op
1400 && (!VECTOR_MODE_P (TYPE_MODE (type))
1401 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1403 tree vector_compute_type
1404 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1405 if (vector_compute_type != NULL_TREE
1406 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1407 < TYPE_VECTOR_SUBPARTS (compute_type))
1408 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1409 != CODE_FOR_nothing))
1410 compute_type = vector_compute_type;
1413 /* If we are breaking a BLKmode vector into smaller pieces,
1414 type_for_widest_vector_mode has already looked into the optab,
1415 so skip these checks. */
1416 if (compute_type == type)
1418 machine_mode compute_mode = TYPE_MODE (compute_type);
1419 if (VECTOR_MODE_P (compute_mode))
1421 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1422 return compute_type;
1423 if (code == MULT_HIGHPART_EXPR
1424 && can_mult_highpart_p (compute_mode,
1425 TYPE_UNSIGNED (compute_type)))
1426 return compute_type;
1428 /* There is no operation in hardware, so fall back to scalars. */
1429 compute_type = TREE_TYPE (type);
1432 return compute_type;
1435 /* Helper function of expand_vector_operations_1. Return number of
1436 vector elements for vector types or 1 for other types. */
1438 static inline int
1439 count_type_subparts (tree type)
1441 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1444 static tree
1445 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1446 tree bitpos, tree bitsize, enum tree_code code)
1448 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1449 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1450 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1451 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1452 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1453 return gimplify_build3 (gsi, code, inner_type, cond, a, b);
1456 /* Expand a vector COND_EXPR to scalars, piecewise. */
1457 static void
1458 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1460 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1461 tree type = gimple_expr_type (stmt);
1462 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1463 machine_mode compute_mode = TYPE_MODE (compute_type);
1464 gcc_assert (compute_mode != BLKmode);
1465 tree lhs = gimple_assign_lhs (stmt);
1466 tree rhs2 = gimple_assign_rhs2 (stmt);
1467 tree rhs3 = gimple_assign_rhs3 (stmt);
1468 tree new_rhs;
1470 /* If the compute mode is not a vector mode (hence we are not decomposing
1471 a BLKmode vector to smaller, hardware-supported vectors), we may want
1472 to expand the operations in parallel. */
1473 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1474 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1475 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1476 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1477 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1478 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1479 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1480 COND_EXPR);
1481 else
1482 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1483 rhs2, rhs3, COND_EXPR);
1484 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1485 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1486 new_rhs);
1488 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1489 way to do it is change expand_vector_operation and its callees to
1490 return a tree_code, RHS1 and RHS2 instead of a tree. */
1491 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1492 update_stmt (gsi_stmt (*gsi));
1495 /* Process one statement. If we identify a vector operation, expand it. */
1497 static void
1498 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1500 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1501 enum tree_code code;
1502 optab op = unknown_optab;
1503 enum gimple_rhs_class rhs_class;
1504 tree new_rhs;
1506 /* Only consider code == GIMPLE_ASSIGN. */
1507 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1508 if (!stmt)
1509 return;
1511 code = gimple_assign_rhs_code (stmt);
1512 rhs_class = get_gimple_rhs_class (code);
1513 lhs = gimple_assign_lhs (stmt);
1515 if (code == VEC_PERM_EXPR)
1517 lower_vec_perm (gsi);
1518 return;
1521 if (code == VEC_COND_EXPR)
1523 expand_vector_condition (gsi);
1524 return;
1527 if (code == COND_EXPR
1528 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1529 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1531 expand_vector_scalar_condition (gsi);
1532 return;
1535 if (code == CONSTRUCTOR
1536 && TREE_CODE (lhs) == SSA_NAME
1537 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1538 && !gimple_clobber_p (stmt)
1539 && optimize)
1541 optimize_vector_constructor (gsi);
1542 return;
1545 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1546 return;
1548 rhs1 = gimple_assign_rhs1 (stmt);
1549 type = gimple_expr_type (stmt);
1550 if (rhs_class == GIMPLE_BINARY_RHS)
1551 rhs2 = gimple_assign_rhs2 (stmt);
1553 if (TREE_CODE (type) != VECTOR_TYPE)
1554 return;
1556 if (CONVERT_EXPR_CODE_P (code)
1557 || code == FLOAT_EXPR
1558 || code == FIX_TRUNC_EXPR
1559 || code == VIEW_CONVERT_EXPR)
1560 return;
1562 /* The signedness is determined from input argument. */
1563 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1564 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1565 type = TREE_TYPE (rhs1);
1567 /* For widening/narrowing vector operations, the relevant type is of the
1568 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1569 calculated in the same way above. */
1570 if (code == WIDEN_SUM_EXPR
1571 || code == VEC_WIDEN_MULT_HI_EXPR
1572 || code == VEC_WIDEN_MULT_LO_EXPR
1573 || code == VEC_WIDEN_MULT_EVEN_EXPR
1574 || code == VEC_WIDEN_MULT_ODD_EXPR
1575 || code == VEC_UNPACK_HI_EXPR
1576 || code == VEC_UNPACK_LO_EXPR
1577 || code == VEC_PACK_TRUNC_EXPR
1578 || code == VEC_PACK_SAT_EXPR
1579 || code == VEC_PACK_FIX_TRUNC_EXPR
1580 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1581 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1582 type = TREE_TYPE (rhs1);
1584 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1585 scalar */
1586 if (code == LSHIFT_EXPR
1587 || code == RSHIFT_EXPR
1588 || code == LROTATE_EXPR
1589 || code == RROTATE_EXPR)
1591 optab opv;
1593 /* Check whether we have vector <op> {x,x,x,x} where x
1594 could be a scalar variable or a constant. Transform
1595 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1596 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1598 tree first;
1599 gimple def_stmt;
1601 if ((TREE_CODE (rhs2) == VECTOR_CST
1602 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1603 || (TREE_CODE (rhs2) == SSA_NAME
1604 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1605 && gimple_assign_single_p (def_stmt)
1606 && (first = uniform_vector_p
1607 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1609 gimple_assign_set_rhs2 (stmt, first);
1610 update_stmt (stmt);
1611 rhs2 = first;
1615 opv = optab_for_tree_code (code, type, optab_vector);
1616 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1617 op = opv;
1618 else
1620 op = optab_for_tree_code (code, type, optab_scalar);
1622 compute_type = get_compute_type (code, op, type);
1623 if (compute_type == type)
1624 return;
1625 /* The rtl expander will expand vector/scalar as vector/vector
1626 if necessary. Pick one with wider vector type. */
1627 tree compute_vtype = get_compute_type (code, opv, type);
1628 if (count_type_subparts (compute_vtype)
1629 > count_type_subparts (compute_type))
1631 compute_type = compute_vtype;
1632 op = opv;
1636 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1638 if (compute_type == NULL_TREE)
1639 compute_type = get_compute_type (code, op, type);
1640 if (compute_type == type)
1641 return;
1642 /* Before splitting vector rotates into scalar rotates,
1643 see if we can't use vector shifts and BIT_IOR_EXPR
1644 instead. For vector by vector rotates we'd also
1645 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1646 for now, fold doesn't seem to create such rotates anyway. */
1647 if (compute_type == TREE_TYPE (type)
1648 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1650 optab oplv = vashl_optab, opl = ashl_optab;
1651 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1652 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1653 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1654 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1655 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1656 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1657 /* The rtl expander will expand vector/scalar as vector/vector
1658 if necessary. Pick one with wider vector type. */
1659 if (count_type_subparts (compute_lvtype)
1660 > count_type_subparts (compute_ltype))
1662 compute_ltype = compute_lvtype;
1663 opl = oplv;
1665 if (count_type_subparts (compute_rvtype)
1666 > count_type_subparts (compute_rtype))
1668 compute_rtype = compute_rvtype;
1669 opr = oprv;
1671 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1672 BIT_IOR_EXPR. */
1673 compute_type = compute_ltype;
1674 if (count_type_subparts (compute_type)
1675 > count_type_subparts (compute_rtype))
1676 compute_type = compute_rtype;
1677 if (count_type_subparts (compute_type)
1678 > count_type_subparts (compute_otype))
1679 compute_type = compute_otype;
1680 /* Verify all 3 operations can be performed in that type. */
1681 if (compute_type != TREE_TYPE (type))
1683 if (optab_handler (opl, TYPE_MODE (compute_type))
1684 == CODE_FOR_nothing
1685 || optab_handler (opr, TYPE_MODE (compute_type))
1686 == CODE_FOR_nothing
1687 || optab_handler (opo, TYPE_MODE (compute_type))
1688 == CODE_FOR_nothing)
1689 compute_type = TREE_TYPE (type);
1694 else
1695 op = optab_for_tree_code (code, type, optab_default);
1697 /* Optabs will try converting a negation into a subtraction, so
1698 look for it as well. TODO: negation of floating-point vectors
1699 might be turned into an exclusive OR toggling the sign bit. */
1700 if (op == unknown_optab
1701 && code == NEGATE_EXPR
1702 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1703 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1705 if (compute_type == NULL_TREE)
1706 compute_type = get_compute_type (code, op, type);
1707 if (compute_type == type)
1708 return;
1710 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1712 /* Leave expression untouched for later expansion. */
1713 if (new_rhs == NULL_TREE)
1714 return;
1716 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1717 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1718 new_rhs);
1720 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1721 way to do it is change expand_vector_operation and its callees to
1722 return a tree_code, RHS1 and RHS2 instead of a tree. */
1723 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1724 update_stmt (gsi_stmt (*gsi));
1727 /* Use this to lower vector operations introduced by the vectorizer,
1728 if it may need the bit-twiddling tricks implemented in this file. */
1730 static unsigned int
1731 expand_vector_operations (void)
1733 gimple_stmt_iterator gsi;
1734 basic_block bb;
1735 bool cfg_changed = false;
1737 FOR_EACH_BB_FN (bb, cfun)
1739 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1741 expand_vector_operations_1 (&gsi);
1742 /* ??? If we do not cleanup EH then we will ICE in
1743 verification. But in reality we have created wrong-code
1744 as we did not properly transition EH info and edges to
1745 the piecewise computations. */
1746 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1747 && gimple_purge_dead_eh_edges (bb))
1748 cfg_changed = true;
1752 return cfg_changed ? TODO_cleanup_cfg : 0;
1755 namespace {
1757 const pass_data pass_data_lower_vector =
1759 GIMPLE_PASS, /* type */
1760 "veclower", /* name */
1761 OPTGROUP_VEC, /* optinfo_flags */
1762 TV_NONE, /* tv_id */
1763 PROP_cfg, /* properties_required */
1764 PROP_gimple_lvec, /* properties_provided */
1765 0, /* properties_destroyed */
1766 0, /* todo_flags_start */
1767 TODO_update_ssa, /* todo_flags_finish */
1770 class pass_lower_vector : public gimple_opt_pass
1772 public:
1773 pass_lower_vector (gcc::context *ctxt)
1774 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1777 /* opt_pass methods: */
1778 virtual bool gate (function *fun)
1780 return !(fun->curr_properties & PROP_gimple_lvec);
1783 virtual unsigned int execute (function *)
1785 return expand_vector_operations ();
1788 }; // class pass_lower_vector
1790 } // anon namespace
1792 gimple_opt_pass *
1793 make_pass_lower_vector (gcc::context *ctxt)
1795 return new pass_lower_vector (ctxt);
1798 namespace {
1800 const pass_data pass_data_lower_vector_ssa =
1802 GIMPLE_PASS, /* type */
1803 "veclower2", /* name */
1804 OPTGROUP_VEC, /* optinfo_flags */
1805 TV_NONE, /* tv_id */
1806 PROP_cfg, /* properties_required */
1807 PROP_gimple_lvec, /* properties_provided */
1808 0, /* properties_destroyed */
1809 0, /* todo_flags_start */
1810 ( TODO_update_ssa
1811 | TODO_cleanup_cfg ), /* todo_flags_finish */
1814 class pass_lower_vector_ssa : public gimple_opt_pass
1816 public:
1817 pass_lower_vector_ssa (gcc::context *ctxt)
1818 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1821 /* opt_pass methods: */
1822 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1823 virtual unsigned int execute (function *)
1825 return expand_vector_operations ();
1828 }; // class pass_lower_vector_ssa
1830 } // anon namespace
1832 gimple_opt_pass *
1833 make_pass_lower_vector_ssa (gcc::context *ctxt)
1835 return new pass_lower_vector_ssa (ctxt);
1838 #include "gt-tree-vect-generic.h"