2015-03-21 Sandra Loosemore <sandra@codesourcery.com>
[official-gcc.git] / gcc / tree-vect-generic.c
bloba88b22f659da95a24848c7254b4bb68646f141fc
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "hash-set.h"
24 #include "machmode.h"
25 #include "vec.h"
26 #include "double-int.h"
27 #include "input.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "options.h"
31 #include "wide-int.h"
32 #include "inchash.h"
33 #include "tree.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "tm.h"
37 #include "langhooks.h"
38 #include "predict.h"
39 #include "hard-reg-set.h"
40 #include "function.h"
41 #include "dominance.h"
42 #include "cfg.h"
43 #include "basic-block.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
46 #include "tree-eh.h"
47 #include "gimple-expr.h"
48 #include "is-a.h"
49 #include "gimple.h"
50 #include "gimple-iterator.h"
51 #include "gimplify-me.h"
52 #include "gimple-ssa.h"
53 #include "tree-cfg.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-iterator.h"
57 #include "tree-pass.h"
58 #include "flags.h"
59 #include "diagnostic.h"
60 #include "target.h"
62 /* Need to include rtl.h, expr.h, etc. for optabs. */
63 #include "hashtab.h"
64 #include "rtl.h"
65 #include "statistics.h"
66 #include "real.h"
67 #include "fixed-value.h"
68 #include "insn-config.h"
69 #include "expmed.h"
70 #include "dojump.h"
71 #include "explow.h"
72 #include "calls.h"
73 #include "emit-rtl.h"
74 #include "varasm.h"
75 #include "stmt.h"
76 #include "expr.h"
77 #include "insn-codes.h"
78 #include "optabs.h"
81 static void expand_vector_operations_1 (gimple_stmt_iterator *);
84 /* Build a constant of type TYPE, made of VALUE's bits replicated
85 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
86 static tree
87 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
89 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
90 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
91 / HOST_BITS_PER_WIDE_INT;
92 unsigned HOST_WIDE_INT low, mask;
93 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
94 int i;
96 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
98 if (width == HOST_BITS_PER_WIDE_INT)
99 low = value;
100 else
102 mask = ((HOST_WIDE_INT)1 << width) - 1;
103 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
106 for (i = 0; i < n; i++)
107 a[i] = low;
109 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
110 return wide_int_to_tree
111 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
114 static GTY(()) tree vector_inner_type;
115 static GTY(()) tree vector_last_type;
116 static GTY(()) int vector_last_nunits;
118 /* Return a suitable vector types made of SUBPARTS units each of mode
119 "word_mode" (the global variable). */
120 static tree
121 build_word_mode_vector_type (int nunits)
123 if (!vector_inner_type)
124 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
125 else if (vector_last_nunits == nunits)
127 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
128 return vector_last_type;
131 /* We build a new type, but we canonicalize it nevertheless,
132 because it still saves some memory. */
133 vector_last_nunits = nunits;
134 vector_last_type = type_hash_canon (nunits,
135 build_vector_type (vector_inner_type,
136 nunits));
137 return vector_last_type;
140 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
141 tree, tree, tree, tree, tree, enum tree_code);
143 static inline tree
144 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
145 tree t, tree bitsize, tree bitpos)
147 if (bitpos)
148 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
149 else
150 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
153 static tree
154 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
155 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
156 enum tree_code code)
158 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
159 return gimplify_build1 (gsi, code, inner_type, a);
162 static tree
163 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
164 tree bitpos, tree bitsize, enum tree_code code)
166 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
167 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
168 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
169 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
170 return gimplify_build2 (gsi, code, inner_type, a, b);
173 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
175 INNER_TYPE is the type of A and B elements
177 returned expression is of signed integer type with the
178 size equal to the size of INNER_TYPE. */
179 static tree
180 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
181 tree bitpos, tree bitsize, enum tree_code code)
183 tree comp_type;
185 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
186 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
188 comp_type = build_nonstandard_integer_type
189 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
191 return gimplify_build3 (gsi, COND_EXPR, comp_type,
192 fold_build2 (code, boolean_type_node, a, b),
193 build_int_cst (comp_type, -1),
194 build_int_cst (comp_type, 0));
197 /* Expand vector addition to scalars. This does bit twiddling
198 in order to increase parallelism:
200 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
201 (a ^ b) & 0x80808080
203 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
204 (a ^ ~b) & 0x80808080
206 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
208 This optimization should be done only if 4 vector items or more
209 fit into a word. */
210 static tree
211 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
212 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
213 enum tree_code code)
215 tree inner_type = TREE_TYPE (TREE_TYPE (a));
216 unsigned HOST_WIDE_INT max;
217 tree low_bits, high_bits, a_low, b_low, result_low, signs;
219 max = GET_MODE_MASK (TYPE_MODE (inner_type));
220 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
221 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
223 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
224 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
226 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
227 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
228 if (code == PLUS_EXPR)
229 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
230 else
232 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
233 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
236 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
237 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
238 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
241 static tree
242 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
243 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
244 tree bitsize ATTRIBUTE_UNUSED,
245 enum tree_code code ATTRIBUTE_UNUSED)
247 tree inner_type = TREE_TYPE (TREE_TYPE (b));
248 HOST_WIDE_INT max;
249 tree low_bits, high_bits, b_low, result_low, signs;
251 max = GET_MODE_MASK (TYPE_MODE (inner_type));
252 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
253 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
255 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
257 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
258 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
259 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
260 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
261 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
264 /* Expand a vector operation to scalars, by using many operations
265 whose type is the vector type's inner type. */
266 static tree
267 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
268 tree type, tree inner_type,
269 tree a, tree b, enum tree_code code)
271 vec<constructor_elt, va_gc> *v;
272 tree part_width = TYPE_SIZE (inner_type);
273 tree index = bitsize_int (0);
274 int nunits = TYPE_VECTOR_SUBPARTS (type);
275 int delta = tree_to_uhwi (part_width)
276 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
277 int i;
278 location_t loc = gimple_location (gsi_stmt (*gsi));
280 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
281 warning_at (loc, OPT_Wvector_operation_performance,
282 "vector operation will be expanded piecewise");
283 else
284 warning_at (loc, OPT_Wvector_operation_performance,
285 "vector operation will be expanded in parallel");
287 vec_alloc (v, (nunits + delta - 1) / delta);
288 for (i = 0; i < nunits;
289 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
291 tree result = f (gsi, inner_type, a, b, index, part_width, code);
292 constructor_elt ce = {NULL_TREE, result};
293 v->quick_push (ce);
296 return build_constructor (type, v);
299 /* Expand a vector operation to scalars with the freedom to use
300 a scalar integer type, or to use a different size for the items
301 in the vector type. */
302 static tree
303 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
304 tree a, tree b,
305 enum tree_code code)
307 tree result, compute_type;
308 machine_mode mode;
309 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
310 location_t loc = gimple_location (gsi_stmt (*gsi));
312 /* We have three strategies. If the type is already correct, just do
313 the operation an element at a time. Else, if the vector is wider than
314 one word, do it a word at a time; finally, if the vector is smaller
315 than one word, do it as a scalar. */
316 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
317 return expand_vector_piecewise (gsi, f,
318 type, TREE_TYPE (type),
319 a, b, code);
320 else if (n_words > 1)
322 tree word_type = build_word_mode_vector_type (n_words);
323 result = expand_vector_piecewise (gsi, f,
324 word_type, TREE_TYPE (word_type),
325 a, b, code);
326 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
327 GSI_SAME_STMT);
329 else
331 /* Use a single scalar operation with a mode no wider than word_mode. */
332 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
333 compute_type = lang_hooks.types.type_for_mode (mode, 1);
334 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
335 warning_at (loc, OPT_Wvector_operation_performance,
336 "vector operation will be expanded with a "
337 "single scalar operation");
340 return result;
343 /* Expand a vector operation to scalars; for integer types we can use
344 special bit twiddling tricks to do the sums a word at a time, using
345 function F_PARALLEL instead of F. These tricks are done only if
346 they can process at least four items, that is, only if the vector
347 holds at least four items and if a word can hold four items. */
348 static tree
349 expand_vector_addition (gimple_stmt_iterator *gsi,
350 elem_op_func f, elem_op_func f_parallel,
351 tree type, tree a, tree b, enum tree_code code)
353 int parts_per_word = UNITS_PER_WORD
354 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
356 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
357 && parts_per_word >= 4
358 && TYPE_VECTOR_SUBPARTS (type) >= 4)
359 return expand_vector_parallel (gsi, f_parallel,
360 type, a, b, code);
361 else
362 return expand_vector_piecewise (gsi, f,
363 type, TREE_TYPE (type),
364 a, b, code);
367 /* Try to expand vector comparison expression OP0 CODE OP1 by
368 querying optab if the following expression:
369 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
370 can be expanded. */
371 static tree
372 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
373 tree op1, enum tree_code code)
375 tree t;
376 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
377 t = expand_vector_piecewise (gsi, do_compare, type,
378 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
379 else
380 t = NULL_TREE;
382 return t;
385 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
386 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
387 the result if successful, otherwise return NULL_TREE. */
388 static tree
389 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
391 optab op;
392 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
393 bool scalar_shift = true;
395 for (i = 1; i < nunits; i++)
397 if (shiftcnts[i] != shiftcnts[0])
398 scalar_shift = false;
401 if (scalar_shift && shiftcnts[0] == 0)
402 return op0;
404 if (scalar_shift)
406 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
407 if (op != unknown_optab
408 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
409 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
410 build_int_cst (NULL_TREE, shiftcnts[0]));
413 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
414 if (op != unknown_optab
415 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
417 tree *vec = XALLOCAVEC (tree, nunits);
418 for (i = 0; i < nunits; i++)
419 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
420 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
421 build_vector (type, vec));
424 return NULL_TREE;
427 /* Try to expand integer vector division by constant using
428 widening multiply, shifts and additions. */
429 static tree
430 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
431 tree op1, enum tree_code code)
433 bool use_pow2 = true;
434 bool has_vector_shift = true;
435 int mode = -1, this_mode;
436 int pre_shift = -1, post_shift;
437 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
438 int *shifts = XALLOCAVEC (int, nunits * 4);
439 int *pre_shifts = shifts + nunits;
440 int *post_shifts = pre_shifts + nunits;
441 int *shift_temps = post_shifts + nunits;
442 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
443 int prec = TYPE_PRECISION (TREE_TYPE (type));
444 int dummy_int;
445 unsigned int i;
446 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
447 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
448 tree *vec;
449 tree cur_op, mulcst, tem;
450 optab op;
452 if (prec > HOST_BITS_PER_WIDE_INT)
453 return NULL_TREE;
455 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
456 if (op == unknown_optab
457 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
458 has_vector_shift = false;
460 /* Analysis phase. Determine if all op1 elements are either power
461 of two and it is possible to expand it using shifts (or for remainder
462 using masking). Additionally compute the multiplicative constants
463 and pre and post shifts if the division is to be expanded using
464 widening or high part multiplication plus shifts. */
465 for (i = 0; i < nunits; i++)
467 tree cst = VECTOR_CST_ELT (op1, i);
468 unsigned HOST_WIDE_INT ml;
470 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
471 return NULL_TREE;
472 pre_shifts[i] = 0;
473 post_shifts[i] = 0;
474 mulc[i] = 0;
475 if (use_pow2
476 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
477 use_pow2 = false;
478 if (use_pow2)
480 shifts[i] = tree_log2 (cst);
481 if (shifts[i] != shifts[0]
482 && code == TRUNC_DIV_EXPR
483 && !has_vector_shift)
484 use_pow2 = false;
486 if (mode == -2)
487 continue;
488 if (sign_p == UNSIGNED)
490 unsigned HOST_WIDE_INT mh;
491 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
493 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
494 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
495 return NULL_TREE;
497 if (d <= 1)
499 mode = -2;
500 continue;
503 /* Find a suitable multiplier and right shift count
504 instead of multiplying with D. */
505 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
507 /* If the suggested multiplier is more than SIZE bits, we can
508 do better for even divisors, using an initial right shift. */
509 if ((mh != 0 && (d & 1) == 0)
510 || (!has_vector_shift && pre_shift != -1))
512 if (has_vector_shift)
513 pre_shift = floor_log2 (d & -d);
514 else if (pre_shift == -1)
516 unsigned int j;
517 for (j = 0; j < nunits; j++)
519 tree cst2 = VECTOR_CST_ELT (op1, j);
520 unsigned HOST_WIDE_INT d2;
521 int this_pre_shift;
523 if (!tree_fits_uhwi_p (cst2))
524 return NULL_TREE;
525 d2 = tree_to_uhwi (cst2) & mask;
526 if (d2 == 0)
527 return NULL_TREE;
528 this_pre_shift = floor_log2 (d2 & -d2);
529 if (pre_shift == -1 || this_pre_shift < pre_shift)
530 pre_shift = this_pre_shift;
532 if (i != 0 && pre_shift != 0)
534 /* Restart. */
535 i = -1U;
536 mode = -1;
537 continue;
540 if (pre_shift != 0)
542 if ((d >> pre_shift) <= 1)
544 mode = -2;
545 continue;
547 mh = choose_multiplier (d >> pre_shift, prec,
548 prec - pre_shift,
549 &ml, &post_shift, &dummy_int);
550 gcc_assert (!mh);
551 pre_shifts[i] = pre_shift;
554 if (!mh)
555 this_mode = 0;
556 else
557 this_mode = 1;
559 else
561 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
562 unsigned HOST_WIDE_INT abs_d;
564 if (d == -1)
565 return NULL_TREE;
567 /* Since d might be INT_MIN, we have to cast to
568 unsigned HOST_WIDE_INT before negating to avoid
569 undefined signed overflow. */
570 abs_d = (d >= 0
571 ? (unsigned HOST_WIDE_INT) d
572 : - (unsigned HOST_WIDE_INT) d);
574 /* n rem d = n rem -d */
575 if (code == TRUNC_MOD_EXPR && d < 0)
576 d = abs_d;
577 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
579 /* This case is not handled correctly below. */
580 mode = -2;
581 continue;
583 if (abs_d <= 1)
585 mode = -2;
586 continue;
589 choose_multiplier (abs_d, prec, prec - 1, &ml,
590 &post_shift, &dummy_int);
591 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
593 this_mode = 4 + (d < 0);
594 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
596 else
597 this_mode = 2 + (d < 0);
599 mulc[i] = ml;
600 post_shifts[i] = post_shift;
601 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
602 || post_shift >= prec
603 || pre_shifts[i] >= prec)
604 this_mode = -2;
606 if (i == 0)
607 mode = this_mode;
608 else if (mode != this_mode)
609 mode = -2;
612 vec = XALLOCAVEC (tree, nunits);
614 if (use_pow2)
616 tree addend = NULL_TREE;
617 if (sign_p == SIGNED)
619 tree uns_type;
621 /* Both division and remainder sequences need
622 op0 < 0 ? mask : 0 computed. It can be either computed as
623 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
624 if none of the shifts is 0, or as the conditional. */
625 for (i = 0; i < nunits; i++)
626 if (shifts[i] == 0)
627 break;
628 uns_type
629 = build_vector_type (build_nonstandard_integer_type (prec, 1),
630 nunits);
631 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
633 for (i = 0; i < nunits; i++)
634 shift_temps[i] = prec - 1;
635 cur_op = add_rshift (gsi, type, op0, shift_temps);
636 if (cur_op != NULL_TREE)
638 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
639 uns_type, cur_op);
640 for (i = 0; i < nunits; i++)
641 shift_temps[i] = prec - shifts[i];
642 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
643 if (cur_op != NULL_TREE)
644 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
645 type, cur_op);
648 if (addend == NULL_TREE
649 && expand_vec_cond_expr_p (type, type))
651 tree zero, cst, cond;
652 gimple stmt;
654 zero = build_zero_cst (type);
655 cond = build2 (LT_EXPR, type, op0, zero);
656 for (i = 0; i < nunits; i++)
657 vec[i] = build_int_cst (TREE_TYPE (type),
658 ((unsigned HOST_WIDE_INT) 1
659 << shifts[i]) - 1);
660 cst = build_vector (type, vec);
661 addend = make_ssa_name (type);
662 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
663 cst, zero);
664 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
667 if (code == TRUNC_DIV_EXPR)
669 if (sign_p == UNSIGNED)
671 /* q = op0 >> shift; */
672 cur_op = add_rshift (gsi, type, op0, shifts);
673 if (cur_op != NULL_TREE)
674 return cur_op;
676 else if (addend != NULL_TREE)
678 /* t1 = op0 + addend;
679 q = t1 >> shift; */
680 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
681 if (op != unknown_optab
682 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
684 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
685 cur_op = add_rshift (gsi, type, cur_op, shifts);
686 if (cur_op != NULL_TREE)
687 return cur_op;
691 else
693 tree mask;
694 for (i = 0; i < nunits; i++)
695 vec[i] = build_int_cst (TREE_TYPE (type),
696 ((unsigned HOST_WIDE_INT) 1
697 << shifts[i]) - 1);
698 mask = build_vector (type, vec);
699 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
700 if (op != unknown_optab
701 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
703 if (sign_p == UNSIGNED)
704 /* r = op0 & mask; */
705 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
706 else if (addend != NULL_TREE)
708 /* t1 = op0 + addend;
709 t2 = t1 & mask;
710 r = t2 - addend; */
711 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
712 if (op != unknown_optab
713 && optab_handler (op, TYPE_MODE (type))
714 != CODE_FOR_nothing)
716 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
717 addend);
718 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
719 cur_op, mask);
720 op = optab_for_tree_code (MINUS_EXPR, type,
721 optab_default);
722 if (op != unknown_optab
723 && optab_handler (op, TYPE_MODE (type))
724 != CODE_FOR_nothing)
725 return gimplify_build2 (gsi, MINUS_EXPR, type,
726 cur_op, addend);
733 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
734 return NULL_TREE;
736 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
737 return NULL_TREE;
739 cur_op = op0;
741 switch (mode)
743 case 0:
744 gcc_assert (sign_p == UNSIGNED);
745 /* t1 = oprnd0 >> pre_shift;
746 t2 = t1 h* ml;
747 q = t2 >> post_shift; */
748 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
749 if (cur_op == NULL_TREE)
750 return NULL_TREE;
751 break;
752 case 1:
753 gcc_assert (sign_p == UNSIGNED);
754 for (i = 0; i < nunits; i++)
756 shift_temps[i] = 1;
757 post_shifts[i]--;
759 break;
760 case 2:
761 case 3:
762 case 4:
763 case 5:
764 gcc_assert (sign_p == SIGNED);
765 for (i = 0; i < nunits; i++)
766 shift_temps[i] = prec - 1;
767 break;
768 default:
769 return NULL_TREE;
772 for (i = 0; i < nunits; i++)
773 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
774 mulcst = build_vector (type, vec);
776 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
778 switch (mode)
780 case 0:
781 /* t1 = oprnd0 >> pre_shift;
782 t2 = t1 h* ml;
783 q = t2 >> post_shift; */
784 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
785 break;
786 case 1:
787 /* t1 = oprnd0 h* ml;
788 t2 = oprnd0 - t1;
789 t3 = t2 >> 1;
790 t4 = t1 + t3;
791 q = t4 >> (post_shift - 1); */
792 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
793 if (op == unknown_optab
794 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
795 return NULL_TREE;
796 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
797 tem = add_rshift (gsi, type, tem, shift_temps);
798 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
799 if (op == unknown_optab
800 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
801 return NULL_TREE;
802 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
803 cur_op = add_rshift (gsi, type, tem, post_shifts);
804 if (cur_op == NULL_TREE)
805 return NULL_TREE;
806 break;
807 case 2:
808 case 3:
809 case 4:
810 case 5:
811 /* t1 = oprnd0 h* ml;
812 t2 = t1; [ iff (mode & 2) != 0 ]
813 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
814 t3 = t2 >> post_shift;
815 t4 = oprnd0 >> (prec - 1);
816 q = t3 - t4; [ iff (mode & 1) == 0 ]
817 q = t4 - t3; [ iff (mode & 1) != 0 ] */
818 if ((mode & 2) == 0)
820 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
821 if (op == unknown_optab
822 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
823 return NULL_TREE;
824 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
826 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
827 if (cur_op == NULL_TREE)
828 return NULL_TREE;
829 tem = add_rshift (gsi, type, op0, shift_temps);
830 if (tem == NULL_TREE)
831 return NULL_TREE;
832 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
833 if (op == unknown_optab
834 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
835 return NULL_TREE;
836 if ((mode & 1) == 0)
837 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
838 else
839 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
840 break;
841 default:
842 gcc_unreachable ();
845 if (code == TRUNC_DIV_EXPR)
846 return cur_op;
848 /* We divided. Now finish by:
849 t1 = q * oprnd1;
850 r = oprnd0 - t1; */
851 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
852 if (op == unknown_optab
853 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
854 return NULL_TREE;
855 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
856 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
857 if (op == unknown_optab
858 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
859 return NULL_TREE;
860 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
863 /* Expand a vector condition to scalars, by using many conditions
864 on the vector's elements. */
865 static void
866 expand_vector_condition (gimple_stmt_iterator *gsi)
868 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
869 tree type = gimple_expr_type (stmt);
870 tree a = gimple_assign_rhs1 (stmt);
871 tree a1 = a;
872 tree a2;
873 bool a_is_comparison = false;
874 tree b = gimple_assign_rhs2 (stmt);
875 tree c = gimple_assign_rhs3 (stmt);
876 vec<constructor_elt, va_gc> *v;
877 tree constr;
878 tree inner_type = TREE_TYPE (type);
879 tree cond_type = TREE_TYPE (TREE_TYPE (a));
880 tree comp_inner_type = cond_type;
881 tree width = TYPE_SIZE (inner_type);
882 tree index = bitsize_int (0);
883 int nunits = TYPE_VECTOR_SUBPARTS (type);
884 int i;
885 location_t loc = gimple_location (gsi_stmt (*gsi));
887 if (!is_gimple_val (a))
889 gcc_assert (COMPARISON_CLASS_P (a));
890 a_is_comparison = true;
891 a1 = TREE_OPERAND (a, 0);
892 a2 = TREE_OPERAND (a, 1);
893 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
896 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
897 return;
899 /* TODO: try and find a smaller vector type. */
901 warning_at (loc, OPT_Wvector_operation_performance,
902 "vector condition will be expanded piecewise");
904 vec_alloc (v, nunits);
905 for (i = 0; i < nunits;
906 i++, index = int_const_binop (PLUS_EXPR, index, width))
908 tree aa, result;
909 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
910 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
911 if (a_is_comparison)
913 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
914 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
915 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
917 else
918 aa = tree_vec_extract (gsi, cond_type, a, width, index);
919 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
920 constructor_elt ce = {NULL_TREE, result};
921 v->quick_push (ce);
924 constr = build_constructor (type, v);
925 gimple_assign_set_rhs_from_tree (gsi, constr);
926 update_stmt (gsi_stmt (*gsi));
929 static tree
930 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
931 gassign *assign, enum tree_code code)
933 machine_mode compute_mode = TYPE_MODE (compute_type);
935 /* If the compute mode is not a vector mode (hence we are not decomposing
936 a BLKmode vector to smaller, hardware-supported vectors), we may want
937 to expand the operations in parallel. */
938 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
939 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
940 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
941 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
942 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
943 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
944 switch (code)
946 case PLUS_EXPR:
947 case MINUS_EXPR:
948 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
949 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
950 gimple_assign_rhs1 (assign),
951 gimple_assign_rhs2 (assign), code);
952 break;
954 case NEGATE_EXPR:
955 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
956 return expand_vector_addition (gsi, do_unop, do_negate, type,
957 gimple_assign_rhs1 (assign),
958 NULL_TREE, code);
959 break;
961 case BIT_AND_EXPR:
962 case BIT_IOR_EXPR:
963 case BIT_XOR_EXPR:
964 return expand_vector_parallel (gsi, do_binop, type,
965 gimple_assign_rhs1 (assign),
966 gimple_assign_rhs2 (assign), code);
968 case BIT_NOT_EXPR:
969 return expand_vector_parallel (gsi, do_unop, type,
970 gimple_assign_rhs1 (assign),
971 NULL_TREE, code);
972 case EQ_EXPR:
973 case NE_EXPR:
974 case GT_EXPR:
975 case LT_EXPR:
976 case GE_EXPR:
977 case LE_EXPR:
978 case UNEQ_EXPR:
979 case UNGT_EXPR:
980 case UNLT_EXPR:
981 case UNGE_EXPR:
982 case UNLE_EXPR:
983 case LTGT_EXPR:
984 case ORDERED_EXPR:
985 case UNORDERED_EXPR:
987 tree rhs1 = gimple_assign_rhs1 (assign);
988 tree rhs2 = gimple_assign_rhs2 (assign);
990 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
993 case TRUNC_DIV_EXPR:
994 case TRUNC_MOD_EXPR:
996 tree rhs1 = gimple_assign_rhs1 (assign);
997 tree rhs2 = gimple_assign_rhs2 (assign);
998 tree ret;
1000 if (!optimize
1001 || !VECTOR_INTEGER_TYPE_P (type)
1002 || TREE_CODE (rhs2) != VECTOR_CST
1003 || !VECTOR_MODE_P (TYPE_MODE (type)))
1004 break;
1006 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1007 if (ret != NULL_TREE)
1008 return ret;
1009 break;
1012 default:
1013 break;
1016 if (TREE_CODE_CLASS (code) == tcc_unary)
1017 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1018 gimple_assign_rhs1 (assign),
1019 NULL_TREE, code);
1020 else
1021 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1022 gimple_assign_rhs1 (assign),
1023 gimple_assign_rhs2 (assign), code);
1026 /* Try to optimize
1027 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1028 style stmts into:
1029 _9 = { b_7, b_7, b_7, b_7 };
1030 a_5 = _9 + { 0, 3, 6, 9 };
1031 because vector splat operation is usually more efficient
1032 than piecewise initialization of the vector. */
1034 static void
1035 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1037 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1038 tree lhs = gimple_assign_lhs (stmt);
1039 tree rhs = gimple_assign_rhs1 (stmt);
1040 tree type = TREE_TYPE (rhs);
1041 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1042 bool all_same = true;
1043 constructor_elt *elt;
1044 tree *cst;
1045 gimple g;
1046 tree base = NULL_TREE;
1047 optab op;
1049 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1050 return;
1051 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1052 if (op == unknown_optab
1053 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1054 return;
1055 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1056 if (TREE_CODE (elt->value) != SSA_NAME
1057 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1058 return;
1059 else
1061 tree this_base = elt->value;
1062 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1063 all_same = false;
1064 for (j = 0; j < nelts + 1; j++)
1066 g = SSA_NAME_DEF_STMT (this_base);
1067 if (is_gimple_assign (g)
1068 && gimple_assign_rhs_code (g) == PLUS_EXPR
1069 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1070 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1071 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1072 this_base = gimple_assign_rhs1 (g);
1073 else
1074 break;
1076 if (i == 0)
1077 base = this_base;
1078 else if (this_base != base)
1079 return;
1081 if (all_same)
1082 return;
1083 cst = XALLOCAVEC (tree, nelts);
1084 for (i = 0; i < nelts; i++)
1086 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1087 cst[i] = build_zero_cst (TREE_TYPE (base));
1088 while (this_base != base)
1090 g = SSA_NAME_DEF_STMT (this_base);
1091 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1092 cst[i], gimple_assign_rhs2 (g));
1093 if (cst[i] == NULL_TREE
1094 || TREE_CODE (cst[i]) != INTEGER_CST
1095 || TREE_OVERFLOW (cst[i]))
1096 return;
1097 this_base = gimple_assign_rhs1 (g);
1100 for (i = 0; i < nelts; i++)
1101 CONSTRUCTOR_ELT (rhs, i)->value = base;
1102 g = gimple_build_assign (make_ssa_name (type), rhs);
1103 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1104 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1105 build_vector (type, cst));
1106 gsi_replace (gsi, g, false);
1109 /* Return a type for the widest vector mode whose components are of type
1110 TYPE, or NULL_TREE if none is found. */
1112 static tree
1113 type_for_widest_vector_mode (tree type, optab op)
1115 machine_mode inner_mode = TYPE_MODE (type);
1116 machine_mode best_mode = VOIDmode, mode;
1117 int best_nunits = 0;
1119 if (SCALAR_FLOAT_MODE_P (inner_mode))
1120 mode = MIN_MODE_VECTOR_FLOAT;
1121 else if (SCALAR_FRACT_MODE_P (inner_mode))
1122 mode = MIN_MODE_VECTOR_FRACT;
1123 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1124 mode = MIN_MODE_VECTOR_UFRACT;
1125 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1126 mode = MIN_MODE_VECTOR_ACCUM;
1127 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1128 mode = MIN_MODE_VECTOR_UACCUM;
1129 else
1130 mode = MIN_MODE_VECTOR_INT;
1132 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1133 if (GET_MODE_INNER (mode) == inner_mode
1134 && GET_MODE_NUNITS (mode) > best_nunits
1135 && optab_handler (op, mode) != CODE_FOR_nothing)
1136 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1138 if (best_mode == VOIDmode)
1139 return NULL_TREE;
1140 else
1141 return build_vector_type_for_mode (type, best_mode);
1145 /* Build a reference to the element of the vector VECT. Function
1146 returns either the element itself, either BIT_FIELD_REF, or an
1147 ARRAY_REF expression.
1149 GSI is required to insert temporary variables while building a
1150 refernece to the element of the vector VECT.
1152 PTMPVEC is a pointer to the temporary variable for caching
1153 purposes. In case when PTMPVEC is NULL new temporary variable
1154 will be created. */
1155 static tree
1156 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1158 tree vect_type, vect_elt_type;
1159 gimple asgn;
1160 tree tmpvec;
1161 tree arraytype;
1162 bool need_asgn = true;
1163 unsigned int elements;
1165 vect_type = TREE_TYPE (vect);
1166 vect_elt_type = TREE_TYPE (vect_type);
1167 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1169 if (TREE_CODE (idx) == INTEGER_CST)
1171 unsigned HOST_WIDE_INT index;
1173 /* Given that we're about to compute a binary modulus,
1174 we don't care about the high bits of the value. */
1175 index = TREE_INT_CST_LOW (idx);
1176 if (!tree_fits_uhwi_p (idx) || index >= elements)
1178 index &= elements - 1;
1179 idx = build_int_cst (TREE_TYPE (idx), index);
1182 /* When lowering a vector statement sequence do some easy
1183 simplification by looking through intermediate vector results. */
1184 if (TREE_CODE (vect) == SSA_NAME)
1186 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1187 if (is_gimple_assign (def_stmt)
1188 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1189 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1190 vect = gimple_assign_rhs1 (def_stmt);
1193 if (TREE_CODE (vect) == VECTOR_CST)
1194 return VECTOR_CST_ELT (vect, index);
1195 else if (TREE_CODE (vect) == CONSTRUCTOR
1196 && (CONSTRUCTOR_NELTS (vect) == 0
1197 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1198 != VECTOR_TYPE))
1200 if (index < CONSTRUCTOR_NELTS (vect))
1201 return CONSTRUCTOR_ELT (vect, index)->value;
1202 return build_zero_cst (vect_elt_type);
1204 else
1206 tree size = TYPE_SIZE (vect_elt_type);
1207 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1208 size);
1209 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1213 if (!ptmpvec)
1214 tmpvec = create_tmp_var (vect_type, "vectmp");
1215 else if (!*ptmpvec)
1216 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1217 else
1219 tmpvec = *ptmpvec;
1220 need_asgn = false;
1223 if (need_asgn)
1225 TREE_ADDRESSABLE (tmpvec) = 1;
1226 asgn = gimple_build_assign (tmpvec, vect);
1227 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1230 arraytype = build_array_type_nelts (vect_elt_type, elements);
1231 return build4 (ARRAY_REF, vect_elt_type,
1232 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1233 idx, NULL_TREE, NULL_TREE);
1236 /* Check if VEC_PERM_EXPR within the given setting is supported
1237 by hardware, or lower it piecewise.
1239 When VEC_PERM_EXPR has the same first and second operands:
1240 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1241 {v0[mask[0]], v0[mask[1]], ...}
1242 MASK and V0 must have the same number of elements.
1244 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1245 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1246 V0 and V1 must have the same type. MASK, V0, V1 must have the
1247 same number of arguments. */
1249 static void
1250 lower_vec_perm (gimple_stmt_iterator *gsi)
1252 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1253 tree mask = gimple_assign_rhs3 (stmt);
1254 tree vec0 = gimple_assign_rhs1 (stmt);
1255 tree vec1 = gimple_assign_rhs2 (stmt);
1256 tree vect_type = TREE_TYPE (vec0);
1257 tree mask_type = TREE_TYPE (mask);
1258 tree vect_elt_type = TREE_TYPE (vect_type);
1259 tree mask_elt_type = TREE_TYPE (mask_type);
1260 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1261 vec<constructor_elt, va_gc> *v;
1262 tree constr, t, si, i_val;
1263 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1264 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1265 location_t loc = gimple_location (gsi_stmt (*gsi));
1266 unsigned i;
1268 if (TREE_CODE (mask) == SSA_NAME)
1270 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1271 if (is_gimple_assign (def_stmt)
1272 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1273 mask = gimple_assign_rhs1 (def_stmt);
1276 if (TREE_CODE (mask) == VECTOR_CST)
1278 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1280 for (i = 0; i < elements; ++i)
1281 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1282 & (2 * elements - 1));
1284 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1286 gimple_assign_set_rhs3 (stmt, mask);
1287 update_stmt (stmt);
1288 return;
1291 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1292 return;
1294 warning_at (loc, OPT_Wvector_operation_performance,
1295 "vector shuffling operation will be expanded piecewise");
1297 vec_alloc (v, elements);
1298 for (i = 0; i < elements; i++)
1300 si = size_int (i);
1301 i_val = vector_element (gsi, mask, si, &masktmp);
1303 if (TREE_CODE (i_val) == INTEGER_CST)
1305 unsigned HOST_WIDE_INT index;
1307 index = TREE_INT_CST_LOW (i_val);
1308 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1309 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1311 if (two_operand_p && (index & elements) != 0)
1312 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1313 else
1314 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1316 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1317 true, GSI_SAME_STMT);
1319 else
1321 tree cond = NULL_TREE, v0_val;
1323 if (two_operand_p)
1325 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1326 build_int_cst (mask_elt_type, elements));
1327 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1328 true, GSI_SAME_STMT);
1331 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1332 build_int_cst (mask_elt_type, elements - 1));
1333 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1334 true, GSI_SAME_STMT);
1336 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1337 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1338 true, GSI_SAME_STMT);
1340 if (two_operand_p)
1342 tree v1_val;
1344 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1345 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1346 true, GSI_SAME_STMT);
1348 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1349 cond, build_zero_cst (mask_elt_type));
1350 cond = fold_build3 (COND_EXPR, vect_elt_type,
1351 cond, v0_val, v1_val);
1352 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1353 true, GSI_SAME_STMT);
1355 else
1356 t = v0_val;
1359 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1362 constr = build_constructor (vect_type, v);
1363 gimple_assign_set_rhs_from_tree (gsi, constr);
1364 update_stmt (gsi_stmt (*gsi));
1367 /* Return type in which CODE operation with optab OP can be
1368 computed. */
1370 static tree
1371 get_compute_type (enum tree_code code, optab op, tree type)
1373 /* For very wide vectors, try using a smaller vector mode. */
1374 tree compute_type = type;
1375 if (op
1376 && (!VECTOR_MODE_P (TYPE_MODE (type))
1377 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1379 tree vector_compute_type
1380 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1381 if (vector_compute_type != NULL_TREE
1382 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1383 < TYPE_VECTOR_SUBPARTS (compute_type))
1384 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1385 != CODE_FOR_nothing))
1386 compute_type = vector_compute_type;
1389 /* If we are breaking a BLKmode vector into smaller pieces,
1390 type_for_widest_vector_mode has already looked into the optab,
1391 so skip these checks. */
1392 if (compute_type == type)
1394 machine_mode compute_mode = TYPE_MODE (compute_type);
1395 if (VECTOR_MODE_P (compute_mode))
1397 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1398 return compute_type;
1399 if (code == MULT_HIGHPART_EXPR
1400 && can_mult_highpart_p (compute_mode,
1401 TYPE_UNSIGNED (compute_type)))
1402 return compute_type;
1404 /* There is no operation in hardware, so fall back to scalars. */
1405 compute_type = TREE_TYPE (type);
1408 return compute_type;
1411 /* Helper function of expand_vector_operations_1. Return number of
1412 vector elements for vector types or 1 for other types. */
1414 static inline int
1415 count_type_subparts (tree type)
1417 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1420 static tree
1421 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1422 tree bitpos, tree bitsize, enum tree_code code)
1424 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1425 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1426 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1427 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1428 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1429 return gimplify_build3 (gsi, code, inner_type, cond, a, b);
1432 /* Expand a vector COND_EXPR to scalars, piecewise. */
1433 static void
1434 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1436 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1437 tree type = gimple_expr_type (stmt);
1438 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1439 machine_mode compute_mode = TYPE_MODE (compute_type);
1440 gcc_assert (compute_mode != BLKmode);
1441 tree lhs = gimple_assign_lhs (stmt);
1442 tree rhs2 = gimple_assign_rhs2 (stmt);
1443 tree rhs3 = gimple_assign_rhs3 (stmt);
1444 tree new_rhs;
1446 /* If the compute mode is not a vector mode (hence we are not decomposing
1447 a BLKmode vector to smaller, hardware-supported vectors), we may want
1448 to expand the operations in parallel. */
1449 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1450 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1451 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1452 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1453 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1454 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1455 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1456 COND_EXPR);
1457 else
1458 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1459 rhs2, rhs3, COND_EXPR);
1460 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1461 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1462 new_rhs);
1464 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1465 way to do it is change expand_vector_operation and its callees to
1466 return a tree_code, RHS1 and RHS2 instead of a tree. */
1467 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1468 update_stmt (gsi_stmt (*gsi));
1471 /* Process one statement. If we identify a vector operation, expand it. */
1473 static void
1474 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1476 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1477 enum tree_code code;
1478 optab op = unknown_optab;
1479 enum gimple_rhs_class rhs_class;
1480 tree new_rhs;
1482 /* Only consider code == GIMPLE_ASSIGN. */
1483 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1484 if (!stmt)
1485 return;
1487 code = gimple_assign_rhs_code (stmt);
1488 rhs_class = get_gimple_rhs_class (code);
1489 lhs = gimple_assign_lhs (stmt);
1491 if (code == VEC_PERM_EXPR)
1493 lower_vec_perm (gsi);
1494 return;
1497 if (code == VEC_COND_EXPR)
1499 expand_vector_condition (gsi);
1500 return;
1503 if (code == COND_EXPR
1504 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1505 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1507 expand_vector_scalar_condition (gsi);
1508 return;
1511 if (code == CONSTRUCTOR
1512 && TREE_CODE (lhs) == SSA_NAME
1513 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1514 && !gimple_clobber_p (stmt)
1515 && optimize)
1517 optimize_vector_constructor (gsi);
1518 return;
1521 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1522 return;
1524 rhs1 = gimple_assign_rhs1 (stmt);
1525 type = gimple_expr_type (stmt);
1526 if (rhs_class == GIMPLE_BINARY_RHS)
1527 rhs2 = gimple_assign_rhs2 (stmt);
1529 if (TREE_CODE (type) != VECTOR_TYPE)
1530 return;
1532 if (CONVERT_EXPR_CODE_P (code)
1533 || code == FLOAT_EXPR
1534 || code == FIX_TRUNC_EXPR
1535 || code == VIEW_CONVERT_EXPR)
1536 return;
1538 /* The signedness is determined from input argument. */
1539 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1540 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1541 type = TREE_TYPE (rhs1);
1543 /* For widening/narrowing vector operations, the relevant type is of the
1544 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1545 calculated in the same way above. */
1546 if (code == WIDEN_SUM_EXPR
1547 || code == VEC_WIDEN_MULT_HI_EXPR
1548 || code == VEC_WIDEN_MULT_LO_EXPR
1549 || code == VEC_WIDEN_MULT_EVEN_EXPR
1550 || code == VEC_WIDEN_MULT_ODD_EXPR
1551 || code == VEC_UNPACK_HI_EXPR
1552 || code == VEC_UNPACK_LO_EXPR
1553 || code == VEC_PACK_TRUNC_EXPR
1554 || code == VEC_PACK_SAT_EXPR
1555 || code == VEC_PACK_FIX_TRUNC_EXPR
1556 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1557 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1558 type = TREE_TYPE (rhs1);
1560 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1561 scalar */
1562 if (code == LSHIFT_EXPR
1563 || code == RSHIFT_EXPR
1564 || code == LROTATE_EXPR
1565 || code == RROTATE_EXPR)
1567 optab opv;
1569 /* Check whether we have vector <op> {x,x,x,x} where x
1570 could be a scalar variable or a constant. Transform
1571 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1572 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1574 tree first;
1575 gimple def_stmt;
1577 if ((TREE_CODE (rhs2) == VECTOR_CST
1578 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1579 || (TREE_CODE (rhs2) == SSA_NAME
1580 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1581 && gimple_assign_single_p (def_stmt)
1582 && (first = uniform_vector_p
1583 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1585 gimple_assign_set_rhs2 (stmt, first);
1586 update_stmt (stmt);
1587 rhs2 = first;
1591 opv = optab_for_tree_code (code, type, optab_vector);
1592 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1593 op = opv;
1594 else
1596 op = optab_for_tree_code (code, type, optab_scalar);
1598 compute_type = get_compute_type (code, op, type);
1599 if (compute_type == type)
1600 return;
1601 /* The rtl expander will expand vector/scalar as vector/vector
1602 if necessary. Pick one with wider vector type. */
1603 tree compute_vtype = get_compute_type (code, opv, type);
1604 if (count_type_subparts (compute_vtype)
1605 > count_type_subparts (compute_type))
1607 compute_type = compute_vtype;
1608 op = opv;
1612 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1614 if (compute_type == NULL_TREE)
1615 compute_type = get_compute_type (code, op, type);
1616 if (compute_type == type)
1617 return;
1618 /* Before splitting vector rotates into scalar rotates,
1619 see if we can't use vector shifts and BIT_IOR_EXPR
1620 instead. For vector by vector rotates we'd also
1621 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1622 for now, fold doesn't seem to create such rotates anyway. */
1623 if (compute_type == TREE_TYPE (type)
1624 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1626 optab oplv = vashl_optab, opl = ashl_optab;
1627 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1628 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1629 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1630 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1631 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1632 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1633 /* The rtl expander will expand vector/scalar as vector/vector
1634 if necessary. Pick one with wider vector type. */
1635 if (count_type_subparts (compute_lvtype)
1636 > count_type_subparts (compute_ltype))
1638 compute_ltype = compute_lvtype;
1639 opl = oplv;
1641 if (count_type_subparts (compute_rvtype)
1642 > count_type_subparts (compute_rtype))
1644 compute_rtype = compute_rvtype;
1645 opr = oprv;
1647 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1648 BIT_IOR_EXPR. */
1649 compute_type = compute_ltype;
1650 if (count_type_subparts (compute_type)
1651 > count_type_subparts (compute_rtype))
1652 compute_type = compute_rtype;
1653 if (count_type_subparts (compute_type)
1654 > count_type_subparts (compute_otype))
1655 compute_type = compute_otype;
1656 /* Verify all 3 operations can be performed in that type. */
1657 if (compute_type != TREE_TYPE (type))
1659 if (optab_handler (opl, TYPE_MODE (compute_type))
1660 == CODE_FOR_nothing
1661 || optab_handler (opr, TYPE_MODE (compute_type))
1662 == CODE_FOR_nothing
1663 || optab_handler (opo, TYPE_MODE (compute_type))
1664 == CODE_FOR_nothing)
1665 compute_type = TREE_TYPE (type);
1670 else
1671 op = optab_for_tree_code (code, type, optab_default);
1673 /* Optabs will try converting a negation into a subtraction, so
1674 look for it as well. TODO: negation of floating-point vectors
1675 might be turned into an exclusive OR toggling the sign bit. */
1676 if (op == unknown_optab
1677 && code == NEGATE_EXPR
1678 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1679 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1681 if (compute_type == NULL_TREE)
1682 compute_type = get_compute_type (code, op, type);
1683 if (compute_type == type)
1684 return;
1686 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1688 /* Leave expression untouched for later expansion. */
1689 if (new_rhs == NULL_TREE)
1690 return;
1692 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1693 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1694 new_rhs);
1696 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1697 way to do it is change expand_vector_operation and its callees to
1698 return a tree_code, RHS1 and RHS2 instead of a tree. */
1699 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1700 update_stmt (gsi_stmt (*gsi));
1703 /* Use this to lower vector operations introduced by the vectorizer,
1704 if it may need the bit-twiddling tricks implemented in this file. */
1706 static unsigned int
1707 expand_vector_operations (void)
1709 gimple_stmt_iterator gsi;
1710 basic_block bb;
1711 bool cfg_changed = false;
1713 FOR_EACH_BB_FN (bb, cfun)
1715 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1717 expand_vector_operations_1 (&gsi);
1718 /* ??? If we do not cleanup EH then we will ICE in
1719 verification. But in reality we have created wrong-code
1720 as we did not properly transition EH info and edges to
1721 the piecewise computations. */
1722 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1723 && gimple_purge_dead_eh_edges (bb))
1724 cfg_changed = true;
1728 return cfg_changed ? TODO_cleanup_cfg : 0;
1731 namespace {
1733 const pass_data pass_data_lower_vector =
1735 GIMPLE_PASS, /* type */
1736 "veclower", /* name */
1737 OPTGROUP_VEC, /* optinfo_flags */
1738 TV_NONE, /* tv_id */
1739 PROP_cfg, /* properties_required */
1740 PROP_gimple_lvec, /* properties_provided */
1741 0, /* properties_destroyed */
1742 0, /* todo_flags_start */
1743 TODO_update_ssa, /* todo_flags_finish */
1746 class pass_lower_vector : public gimple_opt_pass
1748 public:
1749 pass_lower_vector (gcc::context *ctxt)
1750 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1753 /* opt_pass methods: */
1754 virtual bool gate (function *fun)
1756 return !(fun->curr_properties & PROP_gimple_lvec);
1759 virtual unsigned int execute (function *)
1761 return expand_vector_operations ();
1764 }; // class pass_lower_vector
1766 } // anon namespace
1768 gimple_opt_pass *
1769 make_pass_lower_vector (gcc::context *ctxt)
1771 return new pass_lower_vector (ctxt);
1774 namespace {
1776 const pass_data pass_data_lower_vector_ssa =
1778 GIMPLE_PASS, /* type */
1779 "veclower2", /* name */
1780 OPTGROUP_VEC, /* optinfo_flags */
1781 TV_NONE, /* tv_id */
1782 PROP_cfg, /* properties_required */
1783 PROP_gimple_lvec, /* properties_provided */
1784 0, /* properties_destroyed */
1785 0, /* todo_flags_start */
1786 ( TODO_update_ssa
1787 | TODO_cleanup_cfg ), /* todo_flags_finish */
1790 class pass_lower_vector_ssa : public gimple_opt_pass
1792 public:
1793 pass_lower_vector_ssa (gcc::context *ctxt)
1794 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1797 /* opt_pass methods: */
1798 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1799 virtual unsigned int execute (function *)
1801 return expand_vector_operations ();
1804 }; // class pass_lower_vector_ssa
1806 } // anon namespace
1808 gimple_opt_pass *
1809 make_pass_lower_vector_ssa (gcc::context *ctxt)
1811 return new pass_lower_vector_ssa (ctxt);
1814 #include "gt-tree-vect-generic.h"