Update to patch that Aldy committed directly here.
[official-gcc.git] / gcc / tree-vect-generic.c
blobbe3d27fbdf3a4501cb3e74859f0e903a0d9c7575
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "alias.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "rtl.h"
28 #include "ssa.h"
29 #include "options.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "langhooks.h"
33 #include "internal-fn.h"
34 #include "tree-eh.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "tree-cfg.h"
38 #include "tree-iterator.h"
39 #include "tree-pass.h"
40 #include "flags.h"
41 #include "diagnostic.h"
42 #include "target.h"
44 /* Need to include rtl.h, expr.h, etc. for optabs. */
45 #include "insn-config.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "calls.h"
50 #include "emit-rtl.h"
51 #include "varasm.h"
52 #include "stmt.h"
53 #include "expr.h"
54 #include "insn-codes.h"
55 #include "optabs.h"
58 static void expand_vector_operations_1 (gimple_stmt_iterator *);
61 /* Build a constant of type TYPE, made of VALUE's bits replicated
62 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
63 static tree
64 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
66 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
67 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
68 / HOST_BITS_PER_WIDE_INT;
69 unsigned HOST_WIDE_INT low, mask;
70 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
71 int i;
73 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
75 if (width == HOST_BITS_PER_WIDE_INT)
76 low = value;
77 else
79 mask = ((HOST_WIDE_INT)1 << width) - 1;
80 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
83 for (i = 0; i < n; i++)
84 a[i] = low;
86 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
87 return wide_int_to_tree
88 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
91 static GTY(()) tree vector_inner_type;
92 static GTY(()) tree vector_last_type;
93 static GTY(()) int vector_last_nunits;
95 /* Return a suitable vector types made of SUBPARTS units each of mode
96 "word_mode" (the global variable). */
97 static tree
98 build_word_mode_vector_type (int nunits)
100 if (!vector_inner_type)
101 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
102 else if (vector_last_nunits == nunits)
104 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
105 return vector_last_type;
108 /* We build a new type, but we canonicalize it nevertheless,
109 because it still saves some memory. */
110 vector_last_nunits = nunits;
111 vector_last_type = type_hash_canon (nunits,
112 build_vector_type (vector_inner_type,
113 nunits));
114 return vector_last_type;
117 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
118 tree, tree, tree, tree, tree, enum tree_code);
120 static inline tree
121 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
122 tree t, tree bitsize, tree bitpos)
124 if (bitpos)
125 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
126 else
127 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
130 static tree
131 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
132 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
133 enum tree_code code)
135 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
136 return gimplify_build1 (gsi, code, inner_type, a);
139 static tree
140 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
141 tree bitpos, tree bitsize, enum tree_code code)
143 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
144 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
145 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
146 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
147 return gimplify_build2 (gsi, code, inner_type, a, b);
150 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
152 INNER_TYPE is the type of A and B elements
154 returned expression is of signed integer type with the
155 size equal to the size of INNER_TYPE. */
156 static tree
157 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
158 tree bitpos, tree bitsize, enum tree_code code)
160 tree comp_type;
162 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
163 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
165 comp_type = build_nonstandard_integer_type
166 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
168 return gimplify_build3 (gsi, COND_EXPR, comp_type,
169 fold_build2 (code, boolean_type_node, a, b),
170 build_int_cst (comp_type, -1),
171 build_int_cst (comp_type, 0));
174 /* Expand vector addition to scalars. This does bit twiddling
175 in order to increase parallelism:
177 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
178 (a ^ b) & 0x80808080
180 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
181 (a ^ ~b) & 0x80808080
183 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
185 This optimization should be done only if 4 vector items or more
186 fit into a word. */
187 static tree
188 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
189 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
190 enum tree_code code)
192 tree inner_type = TREE_TYPE (TREE_TYPE (a));
193 unsigned HOST_WIDE_INT max;
194 tree low_bits, high_bits, a_low, b_low, result_low, signs;
196 max = GET_MODE_MASK (TYPE_MODE (inner_type));
197 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
198 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
200 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
201 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
203 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
204 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
205 if (code == PLUS_EXPR)
206 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
207 else
209 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
210 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
213 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
214 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
215 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
218 static tree
219 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
220 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
221 tree bitsize ATTRIBUTE_UNUSED,
222 enum tree_code code ATTRIBUTE_UNUSED)
224 tree inner_type = TREE_TYPE (TREE_TYPE (b));
225 HOST_WIDE_INT max;
226 tree low_bits, high_bits, b_low, result_low, signs;
228 max = GET_MODE_MASK (TYPE_MODE (inner_type));
229 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
230 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
232 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
234 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
235 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
236 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
237 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
238 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
241 /* Expand a vector operation to scalars, by using many operations
242 whose type is the vector type's inner type. */
243 static tree
244 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
245 tree type, tree inner_type,
246 tree a, tree b, enum tree_code code)
248 vec<constructor_elt, va_gc> *v;
249 tree part_width = TYPE_SIZE (inner_type);
250 tree index = bitsize_int (0);
251 int nunits = TYPE_VECTOR_SUBPARTS (type);
252 int delta = tree_to_uhwi (part_width)
253 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
254 int i;
255 location_t loc = gimple_location (gsi_stmt (*gsi));
257 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
258 warning_at (loc, OPT_Wvector_operation_performance,
259 "vector operation will be expanded piecewise");
260 else
261 warning_at (loc, OPT_Wvector_operation_performance,
262 "vector operation will be expanded in parallel");
264 vec_alloc (v, (nunits + delta - 1) / delta);
265 for (i = 0; i < nunits;
266 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
268 tree result = f (gsi, inner_type, a, b, index, part_width, code);
269 constructor_elt ce = {NULL_TREE, result};
270 v->quick_push (ce);
273 return build_constructor (type, v);
276 /* Expand a vector operation to scalars with the freedom to use
277 a scalar integer type, or to use a different size for the items
278 in the vector type. */
279 static tree
280 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
281 tree a, tree b,
282 enum tree_code code)
284 tree result, compute_type;
285 machine_mode mode;
286 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
287 location_t loc = gimple_location (gsi_stmt (*gsi));
289 /* We have three strategies. If the type is already correct, just do
290 the operation an element at a time. Else, if the vector is wider than
291 one word, do it a word at a time; finally, if the vector is smaller
292 than one word, do it as a scalar. */
293 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
294 return expand_vector_piecewise (gsi, f,
295 type, TREE_TYPE (type),
296 a, b, code);
297 else if (n_words > 1)
299 tree word_type = build_word_mode_vector_type (n_words);
300 result = expand_vector_piecewise (gsi, f,
301 word_type, TREE_TYPE (word_type),
302 a, b, code);
303 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
304 GSI_SAME_STMT);
306 else
308 /* Use a single scalar operation with a mode no wider than word_mode. */
309 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
310 compute_type = lang_hooks.types.type_for_mode (mode, 1);
311 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
312 warning_at (loc, OPT_Wvector_operation_performance,
313 "vector operation will be expanded with a "
314 "single scalar operation");
317 return result;
320 /* Expand a vector operation to scalars; for integer types we can use
321 special bit twiddling tricks to do the sums a word at a time, using
322 function F_PARALLEL instead of F. These tricks are done only if
323 they can process at least four items, that is, only if the vector
324 holds at least four items and if a word can hold four items. */
325 static tree
326 expand_vector_addition (gimple_stmt_iterator *gsi,
327 elem_op_func f, elem_op_func f_parallel,
328 tree type, tree a, tree b, enum tree_code code)
330 int parts_per_word = UNITS_PER_WORD
331 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
333 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
334 && parts_per_word >= 4
335 && TYPE_VECTOR_SUBPARTS (type) >= 4)
336 return expand_vector_parallel (gsi, f_parallel,
337 type, a, b, code);
338 else
339 return expand_vector_piecewise (gsi, f,
340 type, TREE_TYPE (type),
341 a, b, code);
344 /* Try to expand vector comparison expression OP0 CODE OP1 by
345 querying optab if the following expression:
346 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
347 can be expanded. */
348 static tree
349 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
350 tree op1, enum tree_code code)
352 tree t;
353 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
354 t = expand_vector_piecewise (gsi, do_compare, type,
355 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
356 else
357 t = NULL_TREE;
359 return t;
362 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
363 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
364 the result if successful, otherwise return NULL_TREE. */
365 static tree
366 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
368 optab op;
369 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
370 bool scalar_shift = true;
372 for (i = 1; i < nunits; i++)
374 if (shiftcnts[i] != shiftcnts[0])
375 scalar_shift = false;
378 if (scalar_shift && shiftcnts[0] == 0)
379 return op0;
381 if (scalar_shift)
383 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
384 if (op != unknown_optab
385 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
386 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
387 build_int_cst (NULL_TREE, shiftcnts[0]));
390 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
391 if (op != unknown_optab
392 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
394 tree *vec = XALLOCAVEC (tree, nunits);
395 for (i = 0; i < nunits; i++)
396 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
397 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
398 build_vector (type, vec));
401 return NULL_TREE;
404 /* Try to expand integer vector division by constant using
405 widening multiply, shifts and additions. */
406 static tree
407 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
408 tree op1, enum tree_code code)
410 bool use_pow2 = true;
411 bool has_vector_shift = true;
412 int mode = -1, this_mode;
413 int pre_shift = -1, post_shift;
414 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
415 int *shifts = XALLOCAVEC (int, nunits * 4);
416 int *pre_shifts = shifts + nunits;
417 int *post_shifts = pre_shifts + nunits;
418 int *shift_temps = post_shifts + nunits;
419 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
420 int prec = TYPE_PRECISION (TREE_TYPE (type));
421 int dummy_int;
422 unsigned int i;
423 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
424 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
425 tree *vec;
426 tree cur_op, mulcst, tem;
427 optab op;
429 if (prec > HOST_BITS_PER_WIDE_INT)
430 return NULL_TREE;
432 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
433 if (op == unknown_optab
434 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
435 has_vector_shift = false;
437 /* Analysis phase. Determine if all op1 elements are either power
438 of two and it is possible to expand it using shifts (or for remainder
439 using masking). Additionally compute the multiplicative constants
440 and pre and post shifts if the division is to be expanded using
441 widening or high part multiplication plus shifts. */
442 for (i = 0; i < nunits; i++)
444 tree cst = VECTOR_CST_ELT (op1, i);
445 unsigned HOST_WIDE_INT ml;
447 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
448 return NULL_TREE;
449 pre_shifts[i] = 0;
450 post_shifts[i] = 0;
451 mulc[i] = 0;
452 if (use_pow2
453 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
454 use_pow2 = false;
455 if (use_pow2)
457 shifts[i] = tree_log2 (cst);
458 if (shifts[i] != shifts[0]
459 && code == TRUNC_DIV_EXPR
460 && !has_vector_shift)
461 use_pow2 = false;
463 if (mode == -2)
464 continue;
465 if (sign_p == UNSIGNED)
467 unsigned HOST_WIDE_INT mh;
468 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
470 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
471 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
472 return NULL_TREE;
474 if (d <= 1)
476 mode = -2;
477 continue;
480 /* Find a suitable multiplier and right shift count
481 instead of multiplying with D. */
482 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
484 /* If the suggested multiplier is more than SIZE bits, we can
485 do better for even divisors, using an initial right shift. */
486 if ((mh != 0 && (d & 1) == 0)
487 || (!has_vector_shift && pre_shift != -1))
489 if (has_vector_shift)
490 pre_shift = floor_log2 (d & -d);
491 else if (pre_shift == -1)
493 unsigned int j;
494 for (j = 0; j < nunits; j++)
496 tree cst2 = VECTOR_CST_ELT (op1, j);
497 unsigned HOST_WIDE_INT d2;
498 int this_pre_shift;
500 if (!tree_fits_uhwi_p (cst2))
501 return NULL_TREE;
502 d2 = tree_to_uhwi (cst2) & mask;
503 if (d2 == 0)
504 return NULL_TREE;
505 this_pre_shift = floor_log2 (d2 & -d2);
506 if (pre_shift == -1 || this_pre_shift < pre_shift)
507 pre_shift = this_pre_shift;
509 if (i != 0 && pre_shift != 0)
511 /* Restart. */
512 i = -1U;
513 mode = -1;
514 continue;
517 if (pre_shift != 0)
519 if ((d >> pre_shift) <= 1)
521 mode = -2;
522 continue;
524 mh = choose_multiplier (d >> pre_shift, prec,
525 prec - pre_shift,
526 &ml, &post_shift, &dummy_int);
527 gcc_assert (!mh);
528 pre_shifts[i] = pre_shift;
531 if (!mh)
532 this_mode = 0;
533 else
534 this_mode = 1;
536 else
538 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
539 unsigned HOST_WIDE_INT abs_d;
541 if (d == -1)
542 return NULL_TREE;
544 /* Since d might be INT_MIN, we have to cast to
545 unsigned HOST_WIDE_INT before negating to avoid
546 undefined signed overflow. */
547 abs_d = (d >= 0
548 ? (unsigned HOST_WIDE_INT) d
549 : - (unsigned HOST_WIDE_INT) d);
551 /* n rem d = n rem -d */
552 if (code == TRUNC_MOD_EXPR && d < 0)
553 d = abs_d;
554 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
556 /* This case is not handled correctly below. */
557 mode = -2;
558 continue;
560 if (abs_d <= 1)
562 mode = -2;
563 continue;
566 choose_multiplier (abs_d, prec, prec - 1, &ml,
567 &post_shift, &dummy_int);
568 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
570 this_mode = 4 + (d < 0);
571 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
573 else
574 this_mode = 2 + (d < 0);
576 mulc[i] = ml;
577 post_shifts[i] = post_shift;
578 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
579 || post_shift >= prec
580 || pre_shifts[i] >= prec)
581 this_mode = -2;
583 if (i == 0)
584 mode = this_mode;
585 else if (mode != this_mode)
586 mode = -2;
589 vec = XALLOCAVEC (tree, nunits);
591 if (use_pow2)
593 tree addend = NULL_TREE;
594 if (sign_p == SIGNED)
596 tree uns_type;
598 /* Both division and remainder sequences need
599 op0 < 0 ? mask : 0 computed. It can be either computed as
600 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
601 if none of the shifts is 0, or as the conditional. */
602 for (i = 0; i < nunits; i++)
603 if (shifts[i] == 0)
604 break;
605 uns_type
606 = build_vector_type (build_nonstandard_integer_type (prec, 1),
607 nunits);
608 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
610 for (i = 0; i < nunits; i++)
611 shift_temps[i] = prec - 1;
612 cur_op = add_rshift (gsi, type, op0, shift_temps);
613 if (cur_op != NULL_TREE)
615 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
616 uns_type, cur_op);
617 for (i = 0; i < nunits; i++)
618 shift_temps[i] = prec - shifts[i];
619 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
620 if (cur_op != NULL_TREE)
621 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
622 type, cur_op);
625 if (addend == NULL_TREE
626 && expand_vec_cond_expr_p (type, type))
628 tree zero, cst, cond;
629 gimple stmt;
631 zero = build_zero_cst (type);
632 cond = build2 (LT_EXPR, type, op0, zero);
633 for (i = 0; i < nunits; i++)
634 vec[i] = build_int_cst (TREE_TYPE (type),
635 ((unsigned HOST_WIDE_INT) 1
636 << shifts[i]) - 1);
637 cst = build_vector (type, vec);
638 addend = make_ssa_name (type);
639 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
640 cst, zero);
641 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
644 if (code == TRUNC_DIV_EXPR)
646 if (sign_p == UNSIGNED)
648 /* q = op0 >> shift; */
649 cur_op = add_rshift (gsi, type, op0, shifts);
650 if (cur_op != NULL_TREE)
651 return cur_op;
653 else if (addend != NULL_TREE)
655 /* t1 = op0 + addend;
656 q = t1 >> shift; */
657 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
658 if (op != unknown_optab
659 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
661 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
662 cur_op = add_rshift (gsi, type, cur_op, shifts);
663 if (cur_op != NULL_TREE)
664 return cur_op;
668 else
670 tree mask;
671 for (i = 0; i < nunits; i++)
672 vec[i] = build_int_cst (TREE_TYPE (type),
673 ((unsigned HOST_WIDE_INT) 1
674 << shifts[i]) - 1);
675 mask = build_vector (type, vec);
676 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
677 if (op != unknown_optab
678 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
680 if (sign_p == UNSIGNED)
681 /* r = op0 & mask; */
682 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
683 else if (addend != NULL_TREE)
685 /* t1 = op0 + addend;
686 t2 = t1 & mask;
687 r = t2 - addend; */
688 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
689 if (op != unknown_optab
690 && optab_handler (op, TYPE_MODE (type))
691 != CODE_FOR_nothing)
693 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
694 addend);
695 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
696 cur_op, mask);
697 op = optab_for_tree_code (MINUS_EXPR, type,
698 optab_default);
699 if (op != unknown_optab
700 && optab_handler (op, TYPE_MODE (type))
701 != CODE_FOR_nothing)
702 return gimplify_build2 (gsi, MINUS_EXPR, type,
703 cur_op, addend);
710 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
711 return NULL_TREE;
713 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
714 return NULL_TREE;
716 cur_op = op0;
718 switch (mode)
720 case 0:
721 gcc_assert (sign_p == UNSIGNED);
722 /* t1 = oprnd0 >> pre_shift;
723 t2 = t1 h* ml;
724 q = t2 >> post_shift; */
725 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
726 if (cur_op == NULL_TREE)
727 return NULL_TREE;
728 break;
729 case 1:
730 gcc_assert (sign_p == UNSIGNED);
731 for (i = 0; i < nunits; i++)
733 shift_temps[i] = 1;
734 post_shifts[i]--;
736 break;
737 case 2:
738 case 3:
739 case 4:
740 case 5:
741 gcc_assert (sign_p == SIGNED);
742 for (i = 0; i < nunits; i++)
743 shift_temps[i] = prec - 1;
744 break;
745 default:
746 return NULL_TREE;
749 for (i = 0; i < nunits; i++)
750 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
751 mulcst = build_vector (type, vec);
753 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
755 switch (mode)
757 case 0:
758 /* t1 = oprnd0 >> pre_shift;
759 t2 = t1 h* ml;
760 q = t2 >> post_shift; */
761 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
762 break;
763 case 1:
764 /* t1 = oprnd0 h* ml;
765 t2 = oprnd0 - t1;
766 t3 = t2 >> 1;
767 t4 = t1 + t3;
768 q = t4 >> (post_shift - 1); */
769 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
770 if (op == unknown_optab
771 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
772 return NULL_TREE;
773 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
774 tem = add_rshift (gsi, type, tem, shift_temps);
775 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
776 if (op == unknown_optab
777 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
778 return NULL_TREE;
779 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
780 cur_op = add_rshift (gsi, type, tem, post_shifts);
781 if (cur_op == NULL_TREE)
782 return NULL_TREE;
783 break;
784 case 2:
785 case 3:
786 case 4:
787 case 5:
788 /* t1 = oprnd0 h* ml;
789 t2 = t1; [ iff (mode & 2) != 0 ]
790 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
791 t3 = t2 >> post_shift;
792 t4 = oprnd0 >> (prec - 1);
793 q = t3 - t4; [ iff (mode & 1) == 0 ]
794 q = t4 - t3; [ iff (mode & 1) != 0 ] */
795 if ((mode & 2) == 0)
797 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
798 if (op == unknown_optab
799 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
800 return NULL_TREE;
801 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
803 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
804 if (cur_op == NULL_TREE)
805 return NULL_TREE;
806 tem = add_rshift (gsi, type, op0, shift_temps);
807 if (tem == NULL_TREE)
808 return NULL_TREE;
809 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
810 if (op == unknown_optab
811 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
812 return NULL_TREE;
813 if ((mode & 1) == 0)
814 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
815 else
816 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
817 break;
818 default:
819 gcc_unreachable ();
822 if (code == TRUNC_DIV_EXPR)
823 return cur_op;
825 /* We divided. Now finish by:
826 t1 = q * oprnd1;
827 r = oprnd0 - t1; */
828 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
829 if (op == unknown_optab
830 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
831 return NULL_TREE;
832 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
833 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
834 if (op == unknown_optab
835 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
836 return NULL_TREE;
837 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
840 /* Expand a vector condition to scalars, by using many conditions
841 on the vector's elements. */
842 static void
843 expand_vector_condition (gimple_stmt_iterator *gsi)
845 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
846 tree type = gimple_expr_type (stmt);
847 tree a = gimple_assign_rhs1 (stmt);
848 tree a1 = a;
849 tree a2;
850 bool a_is_comparison = false;
851 tree b = gimple_assign_rhs2 (stmt);
852 tree c = gimple_assign_rhs3 (stmt);
853 vec<constructor_elt, va_gc> *v;
854 tree constr;
855 tree inner_type = TREE_TYPE (type);
856 tree cond_type = TREE_TYPE (TREE_TYPE (a));
857 tree comp_inner_type = cond_type;
858 tree width = TYPE_SIZE (inner_type);
859 tree index = bitsize_int (0);
860 int nunits = TYPE_VECTOR_SUBPARTS (type);
861 int i;
862 location_t loc = gimple_location (gsi_stmt (*gsi));
864 if (!is_gimple_val (a))
866 gcc_assert (COMPARISON_CLASS_P (a));
867 a_is_comparison = true;
868 a1 = TREE_OPERAND (a, 0);
869 a2 = TREE_OPERAND (a, 1);
870 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
873 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
874 return;
876 /* TODO: try and find a smaller vector type. */
878 warning_at (loc, OPT_Wvector_operation_performance,
879 "vector condition will be expanded piecewise");
881 vec_alloc (v, nunits);
882 for (i = 0; i < nunits;
883 i++, index = int_const_binop (PLUS_EXPR, index, width))
885 tree aa, result;
886 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
887 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
888 if (a_is_comparison)
890 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
891 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
892 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
894 else
895 aa = tree_vec_extract (gsi, cond_type, a, width, index);
896 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
897 constructor_elt ce = {NULL_TREE, result};
898 v->quick_push (ce);
901 constr = build_constructor (type, v);
902 gimple_assign_set_rhs_from_tree (gsi, constr);
903 update_stmt (gsi_stmt (*gsi));
906 static tree
907 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
908 gassign *assign, enum tree_code code)
910 machine_mode compute_mode = TYPE_MODE (compute_type);
912 /* If the compute mode is not a vector mode (hence we are not decomposing
913 a BLKmode vector to smaller, hardware-supported vectors), we may want
914 to expand the operations in parallel. */
915 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
916 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
917 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
918 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
919 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
920 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
921 switch (code)
923 case PLUS_EXPR:
924 case MINUS_EXPR:
925 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
926 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
927 gimple_assign_rhs1 (assign),
928 gimple_assign_rhs2 (assign), code);
929 break;
931 case NEGATE_EXPR:
932 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
933 return expand_vector_addition (gsi, do_unop, do_negate, type,
934 gimple_assign_rhs1 (assign),
935 NULL_TREE, code);
936 break;
938 case BIT_AND_EXPR:
939 case BIT_IOR_EXPR:
940 case BIT_XOR_EXPR:
941 return expand_vector_parallel (gsi, do_binop, type,
942 gimple_assign_rhs1 (assign),
943 gimple_assign_rhs2 (assign), code);
945 case BIT_NOT_EXPR:
946 return expand_vector_parallel (gsi, do_unop, type,
947 gimple_assign_rhs1 (assign),
948 NULL_TREE, code);
949 case EQ_EXPR:
950 case NE_EXPR:
951 case GT_EXPR:
952 case LT_EXPR:
953 case GE_EXPR:
954 case LE_EXPR:
955 case UNEQ_EXPR:
956 case UNGT_EXPR:
957 case UNLT_EXPR:
958 case UNGE_EXPR:
959 case UNLE_EXPR:
960 case LTGT_EXPR:
961 case ORDERED_EXPR:
962 case UNORDERED_EXPR:
964 tree rhs1 = gimple_assign_rhs1 (assign);
965 tree rhs2 = gimple_assign_rhs2 (assign);
967 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
970 case TRUNC_DIV_EXPR:
971 case TRUNC_MOD_EXPR:
973 tree rhs1 = gimple_assign_rhs1 (assign);
974 tree rhs2 = gimple_assign_rhs2 (assign);
975 tree ret;
977 if (!optimize
978 || !VECTOR_INTEGER_TYPE_P (type)
979 || TREE_CODE (rhs2) != VECTOR_CST
980 || !VECTOR_MODE_P (TYPE_MODE (type)))
981 break;
983 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
984 if (ret != NULL_TREE)
985 return ret;
986 break;
989 default:
990 break;
993 if (TREE_CODE_CLASS (code) == tcc_unary)
994 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
995 gimple_assign_rhs1 (assign),
996 NULL_TREE, code);
997 else
998 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
999 gimple_assign_rhs1 (assign),
1000 gimple_assign_rhs2 (assign), code);
1003 /* Try to optimize
1004 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1005 style stmts into:
1006 _9 = { b_7, b_7, b_7, b_7 };
1007 a_5 = _9 + { 0, 3, 6, 9 };
1008 because vector splat operation is usually more efficient
1009 than piecewise initialization of the vector. */
1011 static void
1012 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1014 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1015 tree lhs = gimple_assign_lhs (stmt);
1016 tree rhs = gimple_assign_rhs1 (stmt);
1017 tree type = TREE_TYPE (rhs);
1018 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1019 bool all_same = true;
1020 constructor_elt *elt;
1021 tree *cst;
1022 gimple g;
1023 tree base = NULL_TREE;
1024 optab op;
1026 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1027 return;
1028 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1029 if (op == unknown_optab
1030 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1031 return;
1032 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1033 if (TREE_CODE (elt->value) != SSA_NAME
1034 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1035 return;
1036 else
1038 tree this_base = elt->value;
1039 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1040 all_same = false;
1041 for (j = 0; j < nelts + 1; j++)
1043 g = SSA_NAME_DEF_STMT (this_base);
1044 if (is_gimple_assign (g)
1045 && gimple_assign_rhs_code (g) == PLUS_EXPR
1046 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1047 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1048 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1049 this_base = gimple_assign_rhs1 (g);
1050 else
1051 break;
1053 if (i == 0)
1054 base = this_base;
1055 else if (this_base != base)
1056 return;
1058 if (all_same)
1059 return;
1060 cst = XALLOCAVEC (tree, nelts);
1061 for (i = 0; i < nelts; i++)
1063 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1064 cst[i] = build_zero_cst (TREE_TYPE (base));
1065 while (this_base != base)
1067 g = SSA_NAME_DEF_STMT (this_base);
1068 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1069 cst[i], gimple_assign_rhs2 (g));
1070 if (cst[i] == NULL_TREE
1071 || TREE_CODE (cst[i]) != INTEGER_CST
1072 || TREE_OVERFLOW (cst[i]))
1073 return;
1074 this_base = gimple_assign_rhs1 (g);
1077 for (i = 0; i < nelts; i++)
1078 CONSTRUCTOR_ELT (rhs, i)->value = base;
1079 g = gimple_build_assign (make_ssa_name (type), rhs);
1080 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1081 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1082 build_vector (type, cst));
1083 gsi_replace (gsi, g, false);
1086 /* Return a type for the widest vector mode whose components are of type
1087 TYPE, or NULL_TREE if none is found. */
1089 static tree
1090 type_for_widest_vector_mode (tree type, optab op)
1092 machine_mode inner_mode = TYPE_MODE (type);
1093 machine_mode best_mode = VOIDmode, mode;
1094 int best_nunits = 0;
1096 if (SCALAR_FLOAT_MODE_P (inner_mode))
1097 mode = MIN_MODE_VECTOR_FLOAT;
1098 else if (SCALAR_FRACT_MODE_P (inner_mode))
1099 mode = MIN_MODE_VECTOR_FRACT;
1100 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1101 mode = MIN_MODE_VECTOR_UFRACT;
1102 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1103 mode = MIN_MODE_VECTOR_ACCUM;
1104 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1105 mode = MIN_MODE_VECTOR_UACCUM;
1106 else
1107 mode = MIN_MODE_VECTOR_INT;
1109 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1110 if (GET_MODE_INNER (mode) == inner_mode
1111 && GET_MODE_NUNITS (mode) > best_nunits
1112 && optab_handler (op, mode) != CODE_FOR_nothing)
1113 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1115 if (best_mode == VOIDmode)
1116 return NULL_TREE;
1117 else
1118 return build_vector_type_for_mode (type, best_mode);
1122 /* Build a reference to the element of the vector VECT. Function
1123 returns either the element itself, either BIT_FIELD_REF, or an
1124 ARRAY_REF expression.
1126 GSI is required to insert temporary variables while building a
1127 refernece to the element of the vector VECT.
1129 PTMPVEC is a pointer to the temporary variable for caching
1130 purposes. In case when PTMPVEC is NULL new temporary variable
1131 will be created. */
1132 static tree
1133 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1135 tree vect_type, vect_elt_type;
1136 gimple asgn;
1137 tree tmpvec;
1138 tree arraytype;
1139 bool need_asgn = true;
1140 unsigned int elements;
1142 vect_type = TREE_TYPE (vect);
1143 vect_elt_type = TREE_TYPE (vect_type);
1144 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1146 if (TREE_CODE (idx) == INTEGER_CST)
1148 unsigned HOST_WIDE_INT index;
1150 /* Given that we're about to compute a binary modulus,
1151 we don't care about the high bits of the value. */
1152 index = TREE_INT_CST_LOW (idx);
1153 if (!tree_fits_uhwi_p (idx) || index >= elements)
1155 index &= elements - 1;
1156 idx = build_int_cst (TREE_TYPE (idx), index);
1159 /* When lowering a vector statement sequence do some easy
1160 simplification by looking through intermediate vector results. */
1161 if (TREE_CODE (vect) == SSA_NAME)
1163 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1164 if (is_gimple_assign (def_stmt)
1165 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1166 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1167 vect = gimple_assign_rhs1 (def_stmt);
1170 if (TREE_CODE (vect) == VECTOR_CST)
1171 return VECTOR_CST_ELT (vect, index);
1172 else if (TREE_CODE (vect) == CONSTRUCTOR
1173 && (CONSTRUCTOR_NELTS (vect) == 0
1174 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1175 != VECTOR_TYPE))
1177 if (index < CONSTRUCTOR_NELTS (vect))
1178 return CONSTRUCTOR_ELT (vect, index)->value;
1179 return build_zero_cst (vect_elt_type);
1181 else
1183 tree size = TYPE_SIZE (vect_elt_type);
1184 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1185 size);
1186 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1190 if (!ptmpvec)
1191 tmpvec = create_tmp_var (vect_type, "vectmp");
1192 else if (!*ptmpvec)
1193 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1194 else
1196 tmpvec = *ptmpvec;
1197 need_asgn = false;
1200 if (need_asgn)
1202 TREE_ADDRESSABLE (tmpvec) = 1;
1203 asgn = gimple_build_assign (tmpvec, vect);
1204 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1207 arraytype = build_array_type_nelts (vect_elt_type, elements);
1208 return build4 (ARRAY_REF, vect_elt_type,
1209 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1210 idx, NULL_TREE, NULL_TREE);
1213 /* Check if VEC_PERM_EXPR within the given setting is supported
1214 by hardware, or lower it piecewise.
1216 When VEC_PERM_EXPR has the same first and second operands:
1217 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1218 {v0[mask[0]], v0[mask[1]], ...}
1219 MASK and V0 must have the same number of elements.
1221 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1222 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1223 V0 and V1 must have the same type. MASK, V0, V1 must have the
1224 same number of arguments. */
1226 static void
1227 lower_vec_perm (gimple_stmt_iterator *gsi)
1229 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1230 tree mask = gimple_assign_rhs3 (stmt);
1231 tree vec0 = gimple_assign_rhs1 (stmt);
1232 tree vec1 = gimple_assign_rhs2 (stmt);
1233 tree vect_type = TREE_TYPE (vec0);
1234 tree mask_type = TREE_TYPE (mask);
1235 tree vect_elt_type = TREE_TYPE (vect_type);
1236 tree mask_elt_type = TREE_TYPE (mask_type);
1237 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1238 vec<constructor_elt, va_gc> *v;
1239 tree constr, t, si, i_val;
1240 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1241 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1242 location_t loc = gimple_location (gsi_stmt (*gsi));
1243 unsigned i;
1245 if (TREE_CODE (mask) == SSA_NAME)
1247 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1248 if (is_gimple_assign (def_stmt)
1249 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1250 mask = gimple_assign_rhs1 (def_stmt);
1253 if (TREE_CODE (mask) == VECTOR_CST)
1255 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1257 for (i = 0; i < elements; ++i)
1258 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1259 & (2 * elements - 1));
1261 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1263 gimple_assign_set_rhs3 (stmt, mask);
1264 update_stmt (stmt);
1265 return;
1268 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1269 return;
1271 warning_at (loc, OPT_Wvector_operation_performance,
1272 "vector shuffling operation will be expanded piecewise");
1274 vec_alloc (v, elements);
1275 for (i = 0; i < elements; i++)
1277 si = size_int (i);
1278 i_val = vector_element (gsi, mask, si, &masktmp);
1280 if (TREE_CODE (i_val) == INTEGER_CST)
1282 unsigned HOST_WIDE_INT index;
1284 index = TREE_INT_CST_LOW (i_val);
1285 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1286 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1288 if (two_operand_p && (index & elements) != 0)
1289 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1290 else
1291 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1293 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1294 true, GSI_SAME_STMT);
1296 else
1298 tree cond = NULL_TREE, v0_val;
1300 if (two_operand_p)
1302 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1303 build_int_cst (mask_elt_type, elements));
1304 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1305 true, GSI_SAME_STMT);
1308 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1309 build_int_cst (mask_elt_type, elements - 1));
1310 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1311 true, GSI_SAME_STMT);
1313 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1314 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1315 true, GSI_SAME_STMT);
1317 if (two_operand_p)
1319 tree v1_val;
1321 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1322 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1323 true, GSI_SAME_STMT);
1325 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1326 cond, build_zero_cst (mask_elt_type));
1327 cond = fold_build3 (COND_EXPR, vect_elt_type,
1328 cond, v0_val, v1_val);
1329 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1330 true, GSI_SAME_STMT);
1332 else
1333 t = v0_val;
1336 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1339 constr = build_constructor (vect_type, v);
1340 gimple_assign_set_rhs_from_tree (gsi, constr);
1341 update_stmt (gsi_stmt (*gsi));
1344 /* Return type in which CODE operation with optab OP can be
1345 computed. */
1347 static tree
1348 get_compute_type (enum tree_code code, optab op, tree type)
1350 /* For very wide vectors, try using a smaller vector mode. */
1351 tree compute_type = type;
1352 if (op
1353 && (!VECTOR_MODE_P (TYPE_MODE (type))
1354 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1356 tree vector_compute_type
1357 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1358 if (vector_compute_type != NULL_TREE
1359 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1360 < TYPE_VECTOR_SUBPARTS (compute_type))
1361 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1362 != CODE_FOR_nothing))
1363 compute_type = vector_compute_type;
1366 /* If we are breaking a BLKmode vector into smaller pieces,
1367 type_for_widest_vector_mode has already looked into the optab,
1368 so skip these checks. */
1369 if (compute_type == type)
1371 machine_mode compute_mode = TYPE_MODE (compute_type);
1372 if (VECTOR_MODE_P (compute_mode))
1374 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1375 return compute_type;
1376 if (code == MULT_HIGHPART_EXPR
1377 && can_mult_highpart_p (compute_mode,
1378 TYPE_UNSIGNED (compute_type)))
1379 return compute_type;
1381 /* There is no operation in hardware, so fall back to scalars. */
1382 compute_type = TREE_TYPE (type);
1385 return compute_type;
1388 /* Helper function of expand_vector_operations_1. Return number of
1389 vector elements for vector types or 1 for other types. */
1391 static inline int
1392 count_type_subparts (tree type)
1394 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1397 static tree
1398 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1399 tree bitpos, tree bitsize, enum tree_code code)
1401 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1402 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1403 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1404 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1405 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1406 return gimplify_build3 (gsi, code, inner_type, cond, a, b);
1409 /* Expand a vector COND_EXPR to scalars, piecewise. */
1410 static void
1411 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1413 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1414 tree type = gimple_expr_type (stmt);
1415 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1416 machine_mode compute_mode = TYPE_MODE (compute_type);
1417 gcc_assert (compute_mode != BLKmode);
1418 tree lhs = gimple_assign_lhs (stmt);
1419 tree rhs2 = gimple_assign_rhs2 (stmt);
1420 tree rhs3 = gimple_assign_rhs3 (stmt);
1421 tree new_rhs;
1423 /* If the compute mode is not a vector mode (hence we are not decomposing
1424 a BLKmode vector to smaller, hardware-supported vectors), we may want
1425 to expand the operations in parallel. */
1426 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1427 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1428 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1429 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1430 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1431 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1432 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1433 COND_EXPR);
1434 else
1435 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1436 rhs2, rhs3, COND_EXPR);
1437 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1438 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1439 new_rhs);
1441 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1442 way to do it is change expand_vector_operation and its callees to
1443 return a tree_code, RHS1 and RHS2 instead of a tree. */
1444 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1445 update_stmt (gsi_stmt (*gsi));
1448 /* Process one statement. If we identify a vector operation, expand it. */
1450 static void
1451 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1453 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1454 enum tree_code code;
1455 optab op = unknown_optab;
1456 enum gimple_rhs_class rhs_class;
1457 tree new_rhs;
1459 /* Only consider code == GIMPLE_ASSIGN. */
1460 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1461 if (!stmt)
1462 return;
1464 code = gimple_assign_rhs_code (stmt);
1465 rhs_class = get_gimple_rhs_class (code);
1466 lhs = gimple_assign_lhs (stmt);
1468 if (code == VEC_PERM_EXPR)
1470 lower_vec_perm (gsi);
1471 return;
1474 if (code == VEC_COND_EXPR)
1476 expand_vector_condition (gsi);
1477 return;
1480 if (code == COND_EXPR
1481 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1482 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1484 expand_vector_scalar_condition (gsi);
1485 return;
1488 if (code == CONSTRUCTOR
1489 && TREE_CODE (lhs) == SSA_NAME
1490 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1491 && !gimple_clobber_p (stmt)
1492 && optimize)
1494 optimize_vector_constructor (gsi);
1495 return;
1498 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1499 return;
1501 rhs1 = gimple_assign_rhs1 (stmt);
1502 type = gimple_expr_type (stmt);
1503 if (rhs_class == GIMPLE_BINARY_RHS)
1504 rhs2 = gimple_assign_rhs2 (stmt);
1506 if (TREE_CODE (type) != VECTOR_TYPE)
1507 return;
1509 if (CONVERT_EXPR_CODE_P (code)
1510 || code == FLOAT_EXPR
1511 || code == FIX_TRUNC_EXPR
1512 || code == VIEW_CONVERT_EXPR)
1513 return;
1515 /* The signedness is determined from input argument. */
1516 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1517 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1518 type = TREE_TYPE (rhs1);
1520 /* For widening/narrowing vector operations, the relevant type is of the
1521 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1522 calculated in the same way above. */
1523 if (code == WIDEN_SUM_EXPR
1524 || code == VEC_WIDEN_MULT_HI_EXPR
1525 || code == VEC_WIDEN_MULT_LO_EXPR
1526 || code == VEC_WIDEN_MULT_EVEN_EXPR
1527 || code == VEC_WIDEN_MULT_ODD_EXPR
1528 || code == VEC_UNPACK_HI_EXPR
1529 || code == VEC_UNPACK_LO_EXPR
1530 || code == VEC_PACK_TRUNC_EXPR
1531 || code == VEC_PACK_SAT_EXPR
1532 || code == VEC_PACK_FIX_TRUNC_EXPR
1533 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1534 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1535 type = TREE_TYPE (rhs1);
1537 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1538 scalar */
1539 if (code == LSHIFT_EXPR
1540 || code == RSHIFT_EXPR
1541 || code == LROTATE_EXPR
1542 || code == RROTATE_EXPR)
1544 optab opv;
1546 /* Check whether we have vector <op> {x,x,x,x} where x
1547 could be a scalar variable or a constant. Transform
1548 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1549 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1551 tree first;
1552 gimple def_stmt;
1554 if ((TREE_CODE (rhs2) == VECTOR_CST
1555 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1556 || (TREE_CODE (rhs2) == SSA_NAME
1557 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1558 && gimple_assign_single_p (def_stmt)
1559 && (first = uniform_vector_p
1560 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1562 gimple_assign_set_rhs2 (stmt, first);
1563 update_stmt (stmt);
1564 rhs2 = first;
1568 opv = optab_for_tree_code (code, type, optab_vector);
1569 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1570 op = opv;
1571 else
1573 op = optab_for_tree_code (code, type, optab_scalar);
1575 compute_type = get_compute_type (code, op, type);
1576 if (compute_type == type)
1577 return;
1578 /* The rtl expander will expand vector/scalar as vector/vector
1579 if necessary. Pick one with wider vector type. */
1580 tree compute_vtype = get_compute_type (code, opv, type);
1581 if (count_type_subparts (compute_vtype)
1582 > count_type_subparts (compute_type))
1584 compute_type = compute_vtype;
1585 op = opv;
1589 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1591 if (compute_type == NULL_TREE)
1592 compute_type = get_compute_type (code, op, type);
1593 if (compute_type == type)
1594 return;
1595 /* Before splitting vector rotates into scalar rotates,
1596 see if we can't use vector shifts and BIT_IOR_EXPR
1597 instead. For vector by vector rotates we'd also
1598 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1599 for now, fold doesn't seem to create such rotates anyway. */
1600 if (compute_type == TREE_TYPE (type)
1601 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1603 optab oplv = vashl_optab, opl = ashl_optab;
1604 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1605 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1606 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1607 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1608 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1609 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1610 /* The rtl expander will expand vector/scalar as vector/vector
1611 if necessary. Pick one with wider vector type. */
1612 if (count_type_subparts (compute_lvtype)
1613 > count_type_subparts (compute_ltype))
1615 compute_ltype = compute_lvtype;
1616 opl = oplv;
1618 if (count_type_subparts (compute_rvtype)
1619 > count_type_subparts (compute_rtype))
1621 compute_rtype = compute_rvtype;
1622 opr = oprv;
1624 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1625 BIT_IOR_EXPR. */
1626 compute_type = compute_ltype;
1627 if (count_type_subparts (compute_type)
1628 > count_type_subparts (compute_rtype))
1629 compute_type = compute_rtype;
1630 if (count_type_subparts (compute_type)
1631 > count_type_subparts (compute_otype))
1632 compute_type = compute_otype;
1633 /* Verify all 3 operations can be performed in that type. */
1634 if (compute_type != TREE_TYPE (type))
1636 if (optab_handler (opl, TYPE_MODE (compute_type))
1637 == CODE_FOR_nothing
1638 || optab_handler (opr, TYPE_MODE (compute_type))
1639 == CODE_FOR_nothing
1640 || optab_handler (opo, TYPE_MODE (compute_type))
1641 == CODE_FOR_nothing)
1642 compute_type = TREE_TYPE (type);
1647 else
1648 op = optab_for_tree_code (code, type, optab_default);
1650 /* Optabs will try converting a negation into a subtraction, so
1651 look for it as well. TODO: negation of floating-point vectors
1652 might be turned into an exclusive OR toggling the sign bit. */
1653 if (op == unknown_optab
1654 && code == NEGATE_EXPR
1655 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1656 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1658 if (compute_type == NULL_TREE)
1659 compute_type = get_compute_type (code, op, type);
1660 if (compute_type == type)
1661 return;
1663 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1665 /* Leave expression untouched for later expansion. */
1666 if (new_rhs == NULL_TREE)
1667 return;
1669 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1670 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1671 new_rhs);
1673 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1674 way to do it is change expand_vector_operation and its callees to
1675 return a tree_code, RHS1 and RHS2 instead of a tree. */
1676 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1677 update_stmt (gsi_stmt (*gsi));
1680 /* Use this to lower vector operations introduced by the vectorizer,
1681 if it may need the bit-twiddling tricks implemented in this file. */
1683 static unsigned int
1684 expand_vector_operations (void)
1686 gimple_stmt_iterator gsi;
1687 basic_block bb;
1688 bool cfg_changed = false;
1690 FOR_EACH_BB_FN (bb, cfun)
1692 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1694 expand_vector_operations_1 (&gsi);
1695 /* ??? If we do not cleanup EH then we will ICE in
1696 verification. But in reality we have created wrong-code
1697 as we did not properly transition EH info and edges to
1698 the piecewise computations. */
1699 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1700 && gimple_purge_dead_eh_edges (bb))
1701 cfg_changed = true;
1705 return cfg_changed ? TODO_cleanup_cfg : 0;
1708 namespace {
1710 const pass_data pass_data_lower_vector =
1712 GIMPLE_PASS, /* type */
1713 "veclower", /* name */
1714 OPTGROUP_VEC, /* optinfo_flags */
1715 TV_NONE, /* tv_id */
1716 PROP_cfg, /* properties_required */
1717 PROP_gimple_lvec, /* properties_provided */
1718 0, /* properties_destroyed */
1719 0, /* todo_flags_start */
1720 TODO_update_ssa, /* todo_flags_finish */
1723 class pass_lower_vector : public gimple_opt_pass
1725 public:
1726 pass_lower_vector (gcc::context *ctxt)
1727 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1730 /* opt_pass methods: */
1731 virtual bool gate (function *fun)
1733 return !(fun->curr_properties & PROP_gimple_lvec);
1736 virtual unsigned int execute (function *)
1738 return expand_vector_operations ();
1741 }; // class pass_lower_vector
1743 } // anon namespace
1745 gimple_opt_pass *
1746 make_pass_lower_vector (gcc::context *ctxt)
1748 return new pass_lower_vector (ctxt);
1751 namespace {
1753 const pass_data pass_data_lower_vector_ssa =
1755 GIMPLE_PASS, /* type */
1756 "veclower2", /* name */
1757 OPTGROUP_VEC, /* optinfo_flags */
1758 TV_NONE, /* tv_id */
1759 PROP_cfg, /* properties_required */
1760 PROP_gimple_lvec, /* properties_provided */
1761 0, /* properties_destroyed */
1762 0, /* todo_flags_start */
1763 ( TODO_update_ssa
1764 | TODO_cleanup_cfg ), /* todo_flags_finish */
1767 class pass_lower_vector_ssa : public gimple_opt_pass
1769 public:
1770 pass_lower_vector_ssa (gcc::context *ctxt)
1771 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1774 /* opt_pass methods: */
1775 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1776 virtual unsigned int execute (function *)
1778 return expand_vector_operations ();
1781 }; // class pass_lower_vector_ssa
1783 } // anon namespace
1785 gimple_opt_pass *
1786 make_pass_lower_vector_ssa (gcc::context *ctxt)
1788 return new pass_lower_vector_ssa (ctxt);
1791 #include "gt-tree-vect-generic.h"