re PR bootstrap/54281 (Fails to bootstrap with --disable-nls)
[official-gcc.git] / gcc / tree-vect-generic.c
blob5b583124cab2c0a6d34670626f9536f99fe00aea
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
28 #include "gimple.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
31 #include "flags.h"
32 #include "ggc.h"
33 #include "diagnostic.h"
34 #include "target.h"
36 /* Need to include rtl.h, expr.h, etc. for optabs. */
37 #include "expr.h"
38 #include "optabs.h"
41 static void expand_vector_operations_1 (gimple_stmt_iterator *);
44 /* Build a constant of type TYPE, made of VALUE's bits replicated
45 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
46 static tree
47 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
49 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
50 int n = HOST_BITS_PER_WIDE_INT / width;
51 unsigned HOST_WIDE_INT low, high, mask;
52 tree ret;
54 gcc_assert (n);
56 if (width == HOST_BITS_PER_WIDE_INT)
57 low = value;
58 else
60 mask = ((HOST_WIDE_INT)1 << width) - 1;
61 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
64 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
65 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
66 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
67 high = 0;
68 else if (TYPE_PRECISION (type) == HOST_BITS_PER_DOUBLE_INT)
69 high = low;
70 else
71 gcc_unreachable ();
73 ret = build_int_cst_wide (type, low, high);
74 return ret;
77 static GTY(()) tree vector_inner_type;
78 static GTY(()) tree vector_last_type;
79 static GTY(()) int vector_last_nunits;
81 /* Return a suitable vector types made of SUBPARTS units each of mode
82 "word_mode" (the global variable). */
83 static tree
84 build_word_mode_vector_type (int nunits)
86 if (!vector_inner_type)
87 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
88 else if (vector_last_nunits == nunits)
90 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
91 return vector_last_type;
94 /* We build a new type, but we canonicalize it nevertheless,
95 because it still saves some memory. */
96 vector_last_nunits = nunits;
97 vector_last_type = type_hash_canon (nunits,
98 build_vector_type (vector_inner_type,
99 nunits));
100 return vector_last_type;
103 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
104 tree, tree, tree, tree, tree, enum tree_code);
106 static inline tree
107 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
108 tree t, tree bitsize, tree bitpos)
110 if (bitpos)
111 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
112 else
113 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
116 static tree
117 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
118 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
119 enum tree_code code)
121 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
122 return gimplify_build1 (gsi, code, inner_type, a);
125 static tree
126 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
127 tree bitpos, tree bitsize, enum tree_code code)
129 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
130 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
131 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
132 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
133 return gimplify_build2 (gsi, code, inner_type, a, b);
136 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
138 INNER_TYPE is the type of A and B elements
140 returned expression is of signed integer type with the
141 size equal to the size of INNER_TYPE. */
142 static tree
143 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
144 tree bitpos, tree bitsize, enum tree_code code)
146 tree comp_type;
148 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
149 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
151 comp_type = build_nonstandard_integer_type
152 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
154 return gimplify_build3 (gsi, COND_EXPR, comp_type,
155 fold_build2 (code, boolean_type_node, a, b),
156 build_int_cst (comp_type, -1),
157 build_int_cst (comp_type, 0));
160 /* Expand vector addition to scalars. This does bit twiddling
161 in order to increase parallelism:
163 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
164 (a ^ b) & 0x80808080
166 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
167 (a ^ ~b) & 0x80808080
169 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
171 This optimization should be done only if 4 vector items or more
172 fit into a word. */
173 static tree
174 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
175 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
176 enum tree_code code)
178 tree inner_type = TREE_TYPE (TREE_TYPE (a));
179 unsigned HOST_WIDE_INT max;
180 tree low_bits, high_bits, a_low, b_low, result_low, signs;
182 max = GET_MODE_MASK (TYPE_MODE (inner_type));
183 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
184 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
186 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
187 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
189 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
190 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
191 if (code == PLUS_EXPR)
192 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
193 else
195 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
196 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
199 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
200 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
201 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
204 static tree
205 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
206 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
207 tree bitsize ATTRIBUTE_UNUSED,
208 enum tree_code code ATTRIBUTE_UNUSED)
210 tree inner_type = TREE_TYPE (TREE_TYPE (b));
211 HOST_WIDE_INT max;
212 tree low_bits, high_bits, b_low, result_low, signs;
214 max = GET_MODE_MASK (TYPE_MODE (inner_type));
215 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
216 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
218 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
220 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
221 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
222 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
223 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
224 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
227 /* Expand a vector operation to scalars, by using many operations
228 whose type is the vector type's inner type. */
229 static tree
230 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
231 tree type, tree inner_type,
232 tree a, tree b, enum tree_code code)
234 VEC(constructor_elt,gc) *v;
235 tree part_width = TYPE_SIZE (inner_type);
236 tree index = bitsize_int (0);
237 int nunits = TYPE_VECTOR_SUBPARTS (type);
238 int delta = tree_low_cst (part_width, 1)
239 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
240 int i;
241 location_t loc = gimple_location (gsi_stmt (*gsi));
243 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
244 warning_at (loc, OPT_Wvector_operation_performance,
245 "vector operation will be expanded piecewise");
246 else
247 warning_at (loc, OPT_Wvector_operation_performance,
248 "vector operation will be expanded in parallel");
250 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
251 for (i = 0; i < nunits;
252 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
254 tree result = f (gsi, inner_type, a, b, index, part_width, code);
255 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
256 ce->index = NULL_TREE;
257 ce->value = result;
260 return build_constructor (type, v);
263 /* Expand a vector operation to scalars with the freedom to use
264 a scalar integer type, or to use a different size for the items
265 in the vector type. */
266 static tree
267 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
268 tree a, tree b,
269 enum tree_code code)
271 tree result, compute_type;
272 enum machine_mode mode;
273 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
274 location_t loc = gimple_location (gsi_stmt (*gsi));
276 /* We have three strategies. If the type is already correct, just do
277 the operation an element at a time. Else, if the vector is wider than
278 one word, do it a word at a time; finally, if the vector is smaller
279 than one word, do it as a scalar. */
280 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
281 return expand_vector_piecewise (gsi, f,
282 type, TREE_TYPE (type),
283 a, b, code);
284 else if (n_words > 1)
286 tree word_type = build_word_mode_vector_type (n_words);
287 result = expand_vector_piecewise (gsi, f,
288 word_type, TREE_TYPE (word_type),
289 a, b, code);
290 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
291 GSI_SAME_STMT);
293 else
295 /* Use a single scalar operation with a mode no wider than word_mode. */
296 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
297 compute_type = lang_hooks.types.type_for_mode (mode, 1);
298 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
299 warning_at (loc, OPT_Wvector_operation_performance,
300 "vector operation will be expanded with a "
301 "single scalar operation");
304 return result;
307 /* Expand a vector operation to scalars; for integer types we can use
308 special bit twiddling tricks to do the sums a word at a time, using
309 function F_PARALLEL instead of F. These tricks are done only if
310 they can process at least four items, that is, only if the vector
311 holds at least four items and if a word can hold four items. */
312 static tree
313 expand_vector_addition (gimple_stmt_iterator *gsi,
314 elem_op_func f, elem_op_func f_parallel,
315 tree type, tree a, tree b, enum tree_code code)
317 int parts_per_word = UNITS_PER_WORD
318 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
320 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
321 && parts_per_word >= 4
322 && TYPE_VECTOR_SUBPARTS (type) >= 4)
323 return expand_vector_parallel (gsi, f_parallel,
324 type, a, b, code);
325 else
326 return expand_vector_piecewise (gsi, f,
327 type, TREE_TYPE (type),
328 a, b, code);
331 /* Check if vector VEC consists of all the equal elements and
332 that the number of elements corresponds to the type of VEC.
333 The function returns first element of the vector
334 or NULL_TREE if the vector is not uniform. */
335 static tree
336 uniform_vector_p (tree vec)
338 tree first, t;
339 unsigned i;
341 if (vec == NULL_TREE)
342 return NULL_TREE;
344 if (TREE_CODE (vec) == VECTOR_CST)
346 first = VECTOR_CST_ELT (vec, 0);
347 for (i = 1; i < VECTOR_CST_NELTS (vec); ++i)
348 if (!operand_equal_p (first, VECTOR_CST_ELT (vec, i), 0))
349 return NULL_TREE;
351 return first;
354 else if (TREE_CODE (vec) == CONSTRUCTOR)
356 first = error_mark_node;
358 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
360 if (i == 0)
362 first = t;
363 continue;
365 if (!operand_equal_p (first, t, 0))
366 return NULL_TREE;
368 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
369 return NULL_TREE;
371 return first;
374 return NULL_TREE;
377 /* Try to expand vector comparison expression OP0 CODE OP1 by
378 querying optab if the following expression:
379 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
380 can be expanded. */
381 static tree
382 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
383 tree op1, enum tree_code code)
385 tree t;
386 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
387 t = expand_vector_piecewise (gsi, do_compare, type,
388 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
389 else
390 t = NULL_TREE;
392 return t;
395 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
396 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
397 the result if successful, otherwise return NULL_TREE. */
398 static tree
399 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
401 optab op;
402 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
403 bool scalar_shift = true;
405 for (i = 1; i < nunits; i++)
407 if (shiftcnts[i] != shiftcnts[0])
408 scalar_shift = false;
411 if (scalar_shift && shiftcnts[0] == 0)
412 return op0;
414 if (scalar_shift)
416 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
417 if (op != unknown_optab
418 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
419 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
420 build_int_cst (NULL_TREE, shiftcnts[0]));
423 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
424 if (op != unknown_optab
425 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
427 tree *vec = XALLOCAVEC (tree, nunits);
428 for (i = 0; i < nunits; i++)
429 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
430 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
431 build_vector (type, vec));
434 return NULL_TREE;
437 /* Try to expand integer vector division by constant using
438 widening multiply, shifts and additions. */
439 static tree
440 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
441 tree op1, enum tree_code code)
443 bool use_pow2 = true;
444 bool has_vector_shift = true;
445 int mode = -1, this_mode;
446 int pre_shift = -1, post_shift;
447 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
448 int *shifts = XALLOCAVEC (int, nunits * 4);
449 int *pre_shifts = shifts + nunits;
450 int *post_shifts = pre_shifts + nunits;
451 int *shift_temps = post_shifts + nunits;
452 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
453 int prec = TYPE_PRECISION (TREE_TYPE (type));
454 int dummy_int;
455 unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
456 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
457 tree *vec;
458 tree cur_op, mulcst, tem;
459 optab op;
461 if (prec > HOST_BITS_PER_WIDE_INT)
462 return NULL_TREE;
464 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
465 if (op == unknown_optab
466 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
467 has_vector_shift = false;
469 /* Analysis phase. Determine if all op1 elements are either power
470 of two and it is possible to expand it using shifts (or for remainder
471 using masking). Additionally compute the multiplicative constants
472 and pre and post shifts if the division is to be expanded using
473 widening or high part multiplication plus shifts. */
474 for (i = 0; i < nunits; i++)
476 tree cst = VECTOR_CST_ELT (op1, i);
477 unsigned HOST_WIDE_INT ml;
479 if (!host_integerp (cst, unsignedp) || integer_zerop (cst))
480 return NULL_TREE;
481 pre_shifts[i] = 0;
482 post_shifts[i] = 0;
483 mulc[i] = 0;
484 if (use_pow2
485 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
486 use_pow2 = false;
487 if (use_pow2)
489 shifts[i] = tree_log2 (cst);
490 if (shifts[i] != shifts[0]
491 && code == TRUNC_DIV_EXPR
492 && !has_vector_shift)
493 use_pow2 = false;
495 if (mode == -2)
496 continue;
497 if (unsignedp)
499 unsigned HOST_WIDE_INT mh;
500 unsigned HOST_WIDE_INT d = tree_low_cst (cst, 1) & mask;
502 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
503 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
504 return NULL_TREE;
506 if (d <= 1)
508 mode = -2;
509 continue;
512 /* Find a suitable multiplier and right shift count
513 instead of multiplying with D. */
514 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
516 /* If the suggested multiplier is more than SIZE bits, we can
517 do better for even divisors, using an initial right shift. */
518 if ((mh != 0 && (d & 1) == 0)
519 || (!has_vector_shift && pre_shift != -1))
521 if (has_vector_shift)
522 pre_shift = floor_log2 (d & -d);
523 else if (pre_shift == -1)
525 unsigned int j;
526 for (j = 0; j < nunits; j++)
528 tree cst2 = VECTOR_CST_ELT (op1, j);
529 unsigned HOST_WIDE_INT d2;
530 int this_pre_shift;
532 if (!host_integerp (cst2, 1))
533 return NULL_TREE;
534 d2 = tree_low_cst (cst2, 1) & mask;
535 if (d2 == 0)
536 return NULL_TREE;
537 this_pre_shift = floor_log2 (d2 & -d2);
538 if (pre_shift == -1 || this_pre_shift < pre_shift)
539 pre_shift = this_pre_shift;
541 if (i != 0 && pre_shift != 0)
543 /* Restart. */
544 i = -1U;
545 mode = -1;
546 continue;
549 if (pre_shift != 0)
551 if ((d >> pre_shift) <= 1)
553 mode = -2;
554 continue;
556 mh = choose_multiplier (d >> pre_shift, prec,
557 prec - pre_shift,
558 &ml, &post_shift, &dummy_int);
559 gcc_assert (!mh);
560 pre_shifts[i] = pre_shift;
563 if (!mh)
564 this_mode = 0;
565 else
566 this_mode = 1;
568 else
570 HOST_WIDE_INT d = tree_low_cst (cst, 0);
571 unsigned HOST_WIDE_INT abs_d;
573 if (d == -1)
574 return NULL_TREE;
576 /* Since d might be INT_MIN, we have to cast to
577 unsigned HOST_WIDE_INT before negating to avoid
578 undefined signed overflow. */
579 abs_d = (d >= 0
580 ? (unsigned HOST_WIDE_INT) d
581 : - (unsigned HOST_WIDE_INT) d);
583 /* n rem d = n rem -d */
584 if (code == TRUNC_MOD_EXPR && d < 0)
585 d = abs_d;
586 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
588 /* This case is not handled correctly below. */
589 mode = -2;
590 continue;
592 if (abs_d <= 1)
594 mode = -2;
595 continue;
598 choose_multiplier (abs_d, prec, prec - 1, &ml,
599 &post_shift, &dummy_int);
600 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
602 this_mode = 4 + (d < 0);
603 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
605 else
606 this_mode = 2 + (d < 0);
608 mulc[i] = ml;
609 post_shifts[i] = post_shift;
610 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
611 || post_shift >= prec
612 || pre_shifts[i] >= prec)
613 this_mode = -2;
615 if (i == 0)
616 mode = this_mode;
617 else if (mode != this_mode)
618 mode = -2;
621 vec = XALLOCAVEC (tree, nunits);
623 if (use_pow2)
625 tree addend = NULL_TREE;
626 if (!unsignedp)
628 tree uns_type;
630 /* Both division and remainder sequences need
631 op0 < 0 ? mask : 0 computed. It can be either computed as
632 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
633 if none of the shifts is 0, or as the conditional. */
634 for (i = 0; i < nunits; i++)
635 if (shifts[i] == 0)
636 break;
637 uns_type
638 = build_vector_type (build_nonstandard_integer_type (prec, 1),
639 nunits);
640 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
642 for (i = 0; i < nunits; i++)
643 shift_temps[i] = prec - 1;
644 cur_op = add_rshift (gsi, type, op0, shift_temps);
645 if (cur_op != NULL_TREE)
647 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
648 uns_type, cur_op);
649 for (i = 0; i < nunits; i++)
650 shift_temps[i] = prec - shifts[i];
651 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
652 if (cur_op != NULL_TREE)
653 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
654 type, cur_op);
657 if (addend == NULL_TREE
658 && expand_vec_cond_expr_p (type, type))
660 tree zero, cst, cond;
661 gimple stmt;
663 zero = build_zero_cst (type);
664 cond = build2 (LT_EXPR, type, op0, zero);
665 for (i = 0; i < nunits; i++)
666 vec[i] = build_int_cst (TREE_TYPE (type),
667 ((unsigned HOST_WIDE_INT) 1
668 << shifts[i]) - 1);
669 cst = build_vector (type, vec);
670 addend = make_ssa_name (type, NULL);
671 stmt = gimple_build_assign_with_ops3 (VEC_COND_EXPR, addend,
672 cond, cst, zero);
673 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
676 if (code == TRUNC_DIV_EXPR)
678 if (unsignedp)
680 /* q = op0 >> shift; */
681 cur_op = add_rshift (gsi, type, op0, shifts);
682 if (cur_op != NULL_TREE)
683 return cur_op;
685 else if (addend != NULL_TREE)
687 /* t1 = op0 + addend;
688 q = t1 >> shift; */
689 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
690 if (op != unknown_optab
691 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
693 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
694 cur_op = add_rshift (gsi, type, cur_op, shifts);
695 if (cur_op != NULL_TREE)
696 return cur_op;
700 else
702 tree mask;
703 for (i = 0; i < nunits; i++)
704 vec[i] = build_int_cst (TREE_TYPE (type),
705 ((unsigned HOST_WIDE_INT) 1
706 << shifts[i]) - 1);
707 mask = build_vector (type, vec);
708 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
709 if (op != unknown_optab
710 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
712 if (unsignedp)
713 /* r = op0 & mask; */
714 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
715 else if (addend != NULL_TREE)
717 /* t1 = op0 + addend;
718 t2 = t1 & mask;
719 r = t2 - addend; */
720 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
721 if (op != unknown_optab
722 && optab_handler (op, TYPE_MODE (type))
723 != CODE_FOR_nothing)
725 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
726 addend);
727 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
728 cur_op, mask);
729 op = optab_for_tree_code (MINUS_EXPR, type,
730 optab_default);
731 if (op != unknown_optab
732 && optab_handler (op, TYPE_MODE (type))
733 != CODE_FOR_nothing)
734 return gimplify_build2 (gsi, MINUS_EXPR, type,
735 cur_op, addend);
742 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
743 return NULL_TREE;
745 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
746 return NULL_TREE;
748 cur_op = op0;
750 switch (mode)
752 case 0:
753 gcc_assert (unsignedp);
754 /* t1 = oprnd0 >> pre_shift;
755 t2 = t1 h* ml;
756 q = t2 >> post_shift; */
757 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
758 if (cur_op == NULL_TREE)
759 return NULL_TREE;
760 break;
761 case 1:
762 gcc_assert (unsignedp);
763 for (i = 0; i < nunits; i++)
765 shift_temps[i] = 1;
766 post_shifts[i]--;
768 break;
769 case 2:
770 case 3:
771 case 4:
772 case 5:
773 gcc_assert (!unsignedp);
774 for (i = 0; i < nunits; i++)
775 shift_temps[i] = prec - 1;
776 break;
777 default:
778 return NULL_TREE;
781 for (i = 0; i < nunits; i++)
782 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
783 mulcst = build_vector (type, vec);
785 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
787 switch (mode)
789 case 0:
790 /* t1 = oprnd0 >> pre_shift;
791 t2 = t1 h* ml;
792 q = t2 >> post_shift; */
793 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
794 break;
795 case 1:
796 /* t1 = oprnd0 h* ml;
797 t2 = oprnd0 - t1;
798 t3 = t2 >> 1;
799 t4 = t1 + t3;
800 q = t4 >> (post_shift - 1); */
801 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
802 if (op == unknown_optab
803 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
804 return NULL_TREE;
805 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
806 tem = add_rshift (gsi, type, tem, shift_temps);
807 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
808 if (op == unknown_optab
809 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
810 return NULL_TREE;
811 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
812 cur_op = add_rshift (gsi, type, tem, post_shifts);
813 if (cur_op == NULL_TREE)
814 return NULL_TREE;
815 break;
816 case 2:
817 case 3:
818 case 4:
819 case 5:
820 /* t1 = oprnd0 h* ml;
821 t2 = t1; [ iff (mode & 2) != 0 ]
822 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
823 t3 = t2 >> post_shift;
824 t4 = oprnd0 >> (prec - 1);
825 q = t3 - t4; [ iff (mode & 1) == 0 ]
826 q = t4 - t3; [ iff (mode & 1) != 0 ] */
827 if ((mode & 2) == 0)
829 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
830 if (op == unknown_optab
831 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
832 return NULL_TREE;
833 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
835 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
836 if (cur_op == NULL_TREE)
837 return NULL_TREE;
838 tem = add_rshift (gsi, type, op0, shift_temps);
839 if (tem == NULL_TREE)
840 return NULL_TREE;
841 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
842 if (op == unknown_optab
843 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
844 return NULL_TREE;
845 if ((mode & 1) == 0)
846 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
847 else
848 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
849 break;
850 default:
851 gcc_unreachable ();
854 if (code == TRUNC_DIV_EXPR)
855 return cur_op;
857 /* We divided. Now finish by:
858 t1 = q * oprnd1;
859 r = oprnd0 - t1; */
860 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
861 if (op == unknown_optab
862 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
863 return NULL_TREE;
864 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
865 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
866 if (op == unknown_optab
867 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
868 return NULL_TREE;
869 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
872 static tree
873 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
874 gimple assign, enum tree_code code)
876 enum machine_mode compute_mode = TYPE_MODE (compute_type);
878 /* If the compute mode is not a vector mode (hence we are not decomposing
879 a BLKmode vector to smaller, hardware-supported vectors), we may want
880 to expand the operations in parallel. */
881 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
882 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
883 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
884 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
885 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
886 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
887 switch (code)
889 case PLUS_EXPR:
890 case MINUS_EXPR:
891 if (!TYPE_OVERFLOW_TRAPS (type))
892 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
893 gimple_assign_rhs1 (assign),
894 gimple_assign_rhs2 (assign), code);
895 break;
897 case NEGATE_EXPR:
898 if (!TYPE_OVERFLOW_TRAPS (type))
899 return expand_vector_addition (gsi, do_unop, do_negate, type,
900 gimple_assign_rhs1 (assign),
901 NULL_TREE, code);
902 break;
904 case BIT_AND_EXPR:
905 case BIT_IOR_EXPR:
906 case BIT_XOR_EXPR:
907 return expand_vector_parallel (gsi, do_binop, type,
908 gimple_assign_rhs1 (assign),
909 gimple_assign_rhs2 (assign), code);
911 case BIT_NOT_EXPR:
912 return expand_vector_parallel (gsi, do_unop, type,
913 gimple_assign_rhs1 (assign),
914 NULL_TREE, code);
915 case EQ_EXPR:
916 case NE_EXPR:
917 case GT_EXPR:
918 case LT_EXPR:
919 case GE_EXPR:
920 case LE_EXPR:
921 case UNEQ_EXPR:
922 case UNGT_EXPR:
923 case UNLT_EXPR:
924 case UNGE_EXPR:
925 case UNLE_EXPR:
926 case LTGT_EXPR:
927 case ORDERED_EXPR:
928 case UNORDERED_EXPR:
930 tree rhs1 = gimple_assign_rhs1 (assign);
931 tree rhs2 = gimple_assign_rhs2 (assign);
933 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
936 case TRUNC_DIV_EXPR:
937 case TRUNC_MOD_EXPR:
939 tree rhs1 = gimple_assign_rhs1 (assign);
940 tree rhs2 = gimple_assign_rhs2 (assign);
941 tree ret;
943 if (!optimize
944 || !VECTOR_INTEGER_TYPE_P (type)
945 || TREE_CODE (rhs2) != VECTOR_CST)
946 break;
948 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
949 if (ret != NULL_TREE)
950 return ret;
951 break;
954 default:
955 break;
958 if (TREE_CODE_CLASS (code) == tcc_unary)
959 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
960 gimple_assign_rhs1 (assign),
961 NULL_TREE, code);
962 else
963 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
964 gimple_assign_rhs1 (assign),
965 gimple_assign_rhs2 (assign), code);
968 /* Return a type for the widest vector mode whose components are of type
969 TYPE, or NULL_TREE if none is found. */
971 static tree
972 type_for_widest_vector_mode (tree type, optab op)
974 enum machine_mode inner_mode = TYPE_MODE (type);
975 enum machine_mode best_mode = VOIDmode, mode;
976 int best_nunits = 0;
978 if (SCALAR_FLOAT_MODE_P (inner_mode))
979 mode = MIN_MODE_VECTOR_FLOAT;
980 else if (SCALAR_FRACT_MODE_P (inner_mode))
981 mode = MIN_MODE_VECTOR_FRACT;
982 else if (SCALAR_UFRACT_MODE_P (inner_mode))
983 mode = MIN_MODE_VECTOR_UFRACT;
984 else if (SCALAR_ACCUM_MODE_P (inner_mode))
985 mode = MIN_MODE_VECTOR_ACCUM;
986 else if (SCALAR_UACCUM_MODE_P (inner_mode))
987 mode = MIN_MODE_VECTOR_UACCUM;
988 else
989 mode = MIN_MODE_VECTOR_INT;
991 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
992 if (GET_MODE_INNER (mode) == inner_mode
993 && GET_MODE_NUNITS (mode) > best_nunits
994 && optab_handler (op, mode) != CODE_FOR_nothing)
995 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
997 if (best_mode == VOIDmode)
998 return NULL_TREE;
999 else
1000 return build_vector_type_for_mode (type, best_mode);
1004 /* Build a reference to the element of the vector VECT. Function
1005 returns either the element itself, either BIT_FIELD_REF, or an
1006 ARRAY_REF expression.
1008 GSI is required to insert temporary variables while building a
1009 refernece to the element of the vector VECT.
1011 PTMPVEC is a pointer to the temporary variable for caching
1012 purposes. In case when PTMPVEC is NULL new temporary variable
1013 will be created. */
1014 static tree
1015 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1017 tree vect_type, vect_elt_type;
1018 gimple asgn;
1019 tree tmpvec;
1020 tree arraytype;
1021 bool need_asgn = true;
1022 unsigned int elements;
1024 vect_type = TREE_TYPE (vect);
1025 vect_elt_type = TREE_TYPE (vect_type);
1026 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1028 if (TREE_CODE (idx) == INTEGER_CST)
1030 unsigned HOST_WIDE_INT index;
1032 /* Given that we're about to compute a binary modulus,
1033 we don't care about the high bits of the value. */
1034 index = TREE_INT_CST_LOW (idx);
1035 if (!host_integerp (idx, 1) || index >= elements)
1037 index &= elements - 1;
1038 idx = build_int_cst (TREE_TYPE (idx), index);
1041 /* When lowering a vector statement sequence do some easy
1042 simplification by looking through intermediate vector results. */
1043 if (TREE_CODE (vect) == SSA_NAME)
1045 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1046 if (is_gimple_assign (def_stmt)
1047 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1048 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1049 vect = gimple_assign_rhs1 (def_stmt);
1052 if (TREE_CODE (vect) == VECTOR_CST)
1053 return VECTOR_CST_ELT (vect, index);
1054 else if (TREE_CODE (vect) == CONSTRUCTOR)
1056 unsigned i;
1057 tree elt_i, elt_v;
1059 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v)
1060 if (operand_equal_p (elt_i, idx, 0))
1061 return elt_v;
1062 return build_zero_cst (vect_elt_type);
1064 else
1066 tree size = TYPE_SIZE (vect_elt_type);
1067 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1068 size);
1069 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1073 if (!ptmpvec)
1074 tmpvec = create_tmp_var (vect_type, "vectmp");
1075 else if (!*ptmpvec)
1076 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1077 else
1079 tmpvec = *ptmpvec;
1080 need_asgn = false;
1083 if (need_asgn)
1085 TREE_ADDRESSABLE (tmpvec) = 1;
1086 asgn = gimple_build_assign (tmpvec, vect);
1087 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1090 arraytype = build_array_type_nelts (vect_elt_type, elements);
1091 return build4 (ARRAY_REF, vect_elt_type,
1092 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1093 idx, NULL_TREE, NULL_TREE);
1096 /* Check if VEC_PERM_EXPR within the given setting is supported
1097 by hardware, or lower it piecewise.
1099 When VEC_PERM_EXPR has the same first and second operands:
1100 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1101 {v0[mask[0]], v0[mask[1]], ...}
1102 MASK and V0 must have the same number of elements.
1104 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1105 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1106 V0 and V1 must have the same type. MASK, V0, V1 must have the
1107 same number of arguments. */
1109 static void
1110 lower_vec_perm (gimple_stmt_iterator *gsi)
1112 gimple stmt = gsi_stmt (*gsi);
1113 tree mask = gimple_assign_rhs3 (stmt);
1114 tree vec0 = gimple_assign_rhs1 (stmt);
1115 tree vec1 = gimple_assign_rhs2 (stmt);
1116 tree vect_type = TREE_TYPE (vec0);
1117 tree mask_type = TREE_TYPE (mask);
1118 tree vect_elt_type = TREE_TYPE (vect_type);
1119 tree mask_elt_type = TREE_TYPE (mask_type);
1120 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1121 VEC(constructor_elt,gc) *v;
1122 tree constr, t, si, i_val;
1123 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1124 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1125 location_t loc = gimple_location (gsi_stmt (*gsi));
1126 unsigned i;
1128 if (TREE_CODE (mask) == SSA_NAME)
1130 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1131 if (is_gimple_assign (def_stmt)
1132 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1133 mask = gimple_assign_rhs1 (def_stmt);
1136 if (TREE_CODE (mask) == VECTOR_CST)
1138 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1140 for (i = 0; i < elements; ++i)
1141 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1142 & (2 * elements - 1));
1144 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1146 gimple_assign_set_rhs3 (stmt, mask);
1147 update_stmt (stmt);
1148 return;
1151 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1152 return;
1154 warning_at (loc, OPT_Wvector_operation_performance,
1155 "vector shuffling operation will be expanded piecewise");
1157 v = VEC_alloc (constructor_elt, gc, elements);
1158 for (i = 0; i < elements; i++)
1160 si = size_int (i);
1161 i_val = vector_element (gsi, mask, si, &masktmp);
1163 if (TREE_CODE (i_val) == INTEGER_CST)
1165 unsigned HOST_WIDE_INT index;
1167 index = TREE_INT_CST_LOW (i_val);
1168 if (!host_integerp (i_val, 1) || index >= elements)
1169 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1171 if (two_operand_p && (index & elements) != 0)
1172 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1173 else
1174 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1176 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1177 true, GSI_SAME_STMT);
1179 else
1181 tree cond = NULL_TREE, v0_val;
1183 if (two_operand_p)
1185 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1186 build_int_cst (mask_elt_type, elements));
1187 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1188 true, GSI_SAME_STMT);
1191 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1192 build_int_cst (mask_elt_type, elements - 1));
1193 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1194 true, GSI_SAME_STMT);
1196 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1197 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1198 true, GSI_SAME_STMT);
1200 if (two_operand_p)
1202 tree v1_val;
1204 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1205 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1206 true, GSI_SAME_STMT);
1208 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1209 cond, build_zero_cst (mask_elt_type));
1210 cond = fold_build3 (COND_EXPR, vect_elt_type,
1211 cond, v0_val, v1_val);
1212 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1213 true, GSI_SAME_STMT);
1215 else
1216 t = v0_val;
1219 CONSTRUCTOR_APPEND_ELT (v, si, t);
1222 constr = build_constructor (vect_type, v);
1223 gimple_assign_set_rhs_from_tree (gsi, constr);
1224 update_stmt (gsi_stmt (*gsi));
1227 /* Process one statement. If we identify a vector operation, expand it. */
1229 static void
1230 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1232 gimple stmt = gsi_stmt (*gsi);
1233 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
1234 enum tree_code code;
1235 enum machine_mode compute_mode;
1236 optab op = unknown_optab;
1237 enum gimple_rhs_class rhs_class;
1238 tree new_rhs;
1240 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1241 return;
1243 code = gimple_assign_rhs_code (stmt);
1244 rhs_class = get_gimple_rhs_class (code);
1245 lhs = gimple_assign_lhs (stmt);
1247 if (code == VEC_PERM_EXPR)
1249 lower_vec_perm (gsi);
1250 return;
1253 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1254 return;
1256 rhs1 = gimple_assign_rhs1 (stmt);
1257 type = gimple_expr_type (stmt);
1258 if (rhs_class == GIMPLE_BINARY_RHS)
1259 rhs2 = gimple_assign_rhs2 (stmt);
1261 if (TREE_CODE (type) != VECTOR_TYPE)
1262 return;
1264 if (code == NOP_EXPR
1265 || code == FLOAT_EXPR
1266 || code == FIX_TRUNC_EXPR
1267 || code == VIEW_CONVERT_EXPR)
1268 return;
1270 gcc_assert (code != CONVERT_EXPR);
1272 /* The signedness is determined from input argument. */
1273 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1274 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1275 type = TREE_TYPE (rhs1);
1277 /* For widening/narrowing vector operations, the relevant type is of the
1278 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1279 calculated in the same way above. */
1280 if (code == WIDEN_SUM_EXPR
1281 || code == VEC_WIDEN_MULT_HI_EXPR
1282 || code == VEC_WIDEN_MULT_LO_EXPR
1283 || code == VEC_WIDEN_MULT_EVEN_EXPR
1284 || code == VEC_WIDEN_MULT_ODD_EXPR
1285 || code == VEC_UNPACK_HI_EXPR
1286 || code == VEC_UNPACK_LO_EXPR
1287 || code == VEC_PACK_TRUNC_EXPR
1288 || code == VEC_PACK_SAT_EXPR
1289 || code == VEC_PACK_FIX_TRUNC_EXPR
1290 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1291 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1292 type = TREE_TYPE (rhs1);
1294 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1295 scalar */
1296 if (code == LSHIFT_EXPR
1297 || code == RSHIFT_EXPR
1298 || code == LROTATE_EXPR
1299 || code == RROTATE_EXPR)
1301 optab opv;
1303 /* Check whether we have vector <op> {x,x,x,x} where x
1304 could be a scalar variable or a constant. Transform
1305 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1306 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1308 tree first;
1309 gimple def_stmt;
1311 if ((TREE_CODE (rhs2) == VECTOR_CST
1312 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1313 || (TREE_CODE (rhs2) == SSA_NAME
1314 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1315 && gimple_assign_single_p (def_stmt)
1316 && (first = uniform_vector_p
1317 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1319 gimple_assign_set_rhs2 (stmt, first);
1320 update_stmt (stmt);
1321 rhs2 = first;
1325 opv = optab_for_tree_code (code, type, optab_vector);
1326 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1327 op = opv;
1328 else
1330 op = optab_for_tree_code (code, type, optab_scalar);
1332 /* The rtl expander will expand vector/scalar as vector/vector
1333 if necessary. Don't bother converting the stmt here. */
1334 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
1335 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
1336 return;
1339 else
1340 op = optab_for_tree_code (code, type, optab_default);
1342 /* Optabs will try converting a negation into a subtraction, so
1343 look for it as well. TODO: negation of floating-point vectors
1344 might be turned into an exclusive OR toggling the sign bit. */
1345 if (op == unknown_optab
1346 && code == NEGATE_EXPR
1347 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1348 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1350 /* For very wide vectors, try using a smaller vector mode. */
1351 compute_type = type;
1352 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
1354 tree vector_compute_type
1355 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1356 if (vector_compute_type != NULL_TREE
1357 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1358 < TYPE_VECTOR_SUBPARTS (compute_type))
1359 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1360 != CODE_FOR_nothing))
1361 compute_type = vector_compute_type;
1364 /* If we are breaking a BLKmode vector into smaller pieces,
1365 type_for_widest_vector_mode has already looked into the optab,
1366 so skip these checks. */
1367 if (compute_type == type)
1369 compute_mode = TYPE_MODE (compute_type);
1370 if (VECTOR_MODE_P (compute_mode))
1372 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1373 return;
1374 if (code == MULT_HIGHPART_EXPR
1375 && can_mult_highpart_p (compute_mode,
1376 TYPE_UNSIGNED (compute_type)))
1377 return;
1379 /* There is no operation in hardware, so fall back to scalars. */
1380 compute_type = TREE_TYPE (type);
1383 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
1384 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1386 /* Leave expression untouched for later expansion. */
1387 if (new_rhs == NULL_TREE)
1388 return;
1390 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1391 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1392 new_rhs);
1394 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1395 way to do it is change expand_vector_operation and its callees to
1396 return a tree_code, RHS1 and RHS2 instead of a tree. */
1397 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1398 update_stmt (gsi_stmt (*gsi));
1401 /* Use this to lower vector operations introduced by the vectorizer,
1402 if it may need the bit-twiddling tricks implemented in this file. */
1404 static bool
1405 gate_expand_vector_operations_ssa (void)
1407 return optimize == 0;
1410 static unsigned int
1411 expand_vector_operations (void)
1413 gimple_stmt_iterator gsi;
1414 basic_block bb;
1415 bool cfg_changed = false;
1417 FOR_EACH_BB (bb)
1419 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1421 expand_vector_operations_1 (&gsi);
1422 /* ??? If we do not cleanup EH then we will ICE in
1423 verification. But in reality we have created wrong-code
1424 as we did not properly transition EH info and edges to
1425 the piecewise computations. */
1426 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1427 && gimple_purge_dead_eh_edges (bb))
1428 cfg_changed = true;
1432 return cfg_changed ? TODO_cleanup_cfg : 0;
1435 struct gimple_opt_pass pass_lower_vector =
1438 GIMPLE_PASS,
1439 "veclower", /* name */
1440 gate_expand_vector_operations_ssa, /* gate */
1441 expand_vector_operations, /* execute */
1442 NULL, /* sub */
1443 NULL, /* next */
1444 0, /* static_pass_number */
1445 TV_NONE, /* tv_id */
1446 PROP_cfg, /* properties_required */
1447 0, /* properties_provided */
1448 0, /* properties_destroyed */
1449 0, /* todo_flags_start */
1450 TODO_update_ssa /* todo_flags_finish */
1451 | TODO_verify_ssa
1452 | TODO_verify_stmts | TODO_verify_flow
1453 | TODO_cleanup_cfg
1457 struct gimple_opt_pass pass_lower_vector_ssa =
1460 GIMPLE_PASS,
1461 "veclower2", /* name */
1462 0, /* gate */
1463 expand_vector_operations, /* execute */
1464 NULL, /* sub */
1465 NULL, /* next */
1466 0, /* static_pass_number */
1467 TV_NONE, /* tv_id */
1468 PROP_cfg, /* properties_required */
1469 0, /* properties_provided */
1470 0, /* properties_destroyed */
1471 0, /* todo_flags_start */
1472 TODO_update_ssa /* todo_flags_finish */
1473 | TODO_verify_ssa
1474 | TODO_verify_stmts | TODO_verify_flow
1475 | TODO_cleanup_cfg
1479 #include "gt-tree-vect-generic.h"