Daily bump.
[official-gcc.git] / gcc / tree-vect-generic.c
blobf631c99cc0b2a5af244033162f86bb409575d494
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tree.h"
24 #include "stor-layout.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "basic-block.h"
28 #include "tree-ssa-alias.h"
29 #include "internal-fn.h"
30 #include "tree-eh.h"
31 #include "gimple-expr.h"
32 #include "is-a.h"
33 #include "gimple.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "gimple-ssa.h"
37 #include "tree-cfg.h"
38 #include "stringpool.h"
39 #include "tree-ssanames.h"
40 #include "tree-iterator.h"
41 #include "tree-pass.h"
42 #include "flags.h"
43 #include "diagnostic.h"
44 #include "target.h"
46 /* Need to include rtl.h, expr.h, etc. for optabs. */
47 #include "expr.h"
48 #include "optabs.h"
51 static void expand_vector_operations_1 (gimple_stmt_iterator *);
54 /* Build a constant of type TYPE, made of VALUE's bits replicated
55 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
56 static tree
57 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
59 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
60 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
61 / HOST_BITS_PER_WIDE_INT;
62 unsigned HOST_WIDE_INT low, mask;
63 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
64 int i;
66 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
68 if (width == HOST_BITS_PER_WIDE_INT)
69 low = value;
70 else
72 mask = ((HOST_WIDE_INT)1 << width) - 1;
73 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
76 for (i = 0; i < n; i++)
77 a[i] = low;
79 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
80 return wide_int_to_tree
81 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
84 static GTY(()) tree vector_inner_type;
85 static GTY(()) tree vector_last_type;
86 static GTY(()) int vector_last_nunits;
88 /* Return a suitable vector types made of SUBPARTS units each of mode
89 "word_mode" (the global variable). */
90 static tree
91 build_word_mode_vector_type (int nunits)
93 if (!vector_inner_type)
94 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
95 else if (vector_last_nunits == nunits)
97 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
98 return vector_last_type;
101 /* We build a new type, but we canonicalize it nevertheless,
102 because it still saves some memory. */
103 vector_last_nunits = nunits;
104 vector_last_type = type_hash_canon (nunits,
105 build_vector_type (vector_inner_type,
106 nunits));
107 return vector_last_type;
110 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
111 tree, tree, tree, tree, tree, enum tree_code);
113 static inline tree
114 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
115 tree t, tree bitsize, tree bitpos)
117 if (bitpos)
118 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
119 else
120 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
123 static tree
124 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
125 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
126 enum tree_code code)
128 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
129 return gimplify_build1 (gsi, code, inner_type, a);
132 static tree
133 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
134 tree bitpos, tree bitsize, enum tree_code code)
136 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
137 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
138 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
139 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
140 return gimplify_build2 (gsi, code, inner_type, a, b);
143 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
145 INNER_TYPE is the type of A and B elements
147 returned expression is of signed integer type with the
148 size equal to the size of INNER_TYPE. */
149 static tree
150 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
151 tree bitpos, tree bitsize, enum tree_code code)
153 tree comp_type;
155 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
156 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
158 comp_type = build_nonstandard_integer_type
159 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
161 return gimplify_build3 (gsi, COND_EXPR, comp_type,
162 fold_build2 (code, boolean_type_node, a, b),
163 build_int_cst (comp_type, -1),
164 build_int_cst (comp_type, 0));
167 /* Expand vector addition to scalars. This does bit twiddling
168 in order to increase parallelism:
170 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
171 (a ^ b) & 0x80808080
173 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
174 (a ^ ~b) & 0x80808080
176 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
178 This optimization should be done only if 4 vector items or more
179 fit into a word. */
180 static tree
181 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
182 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
183 enum tree_code code)
185 tree inner_type = TREE_TYPE (TREE_TYPE (a));
186 unsigned HOST_WIDE_INT max;
187 tree low_bits, high_bits, a_low, b_low, result_low, signs;
189 max = GET_MODE_MASK (TYPE_MODE (inner_type));
190 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
191 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
193 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
194 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
196 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
197 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
198 if (code == PLUS_EXPR)
199 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
200 else
202 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
203 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
206 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
207 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
208 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
211 static tree
212 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
213 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
214 tree bitsize ATTRIBUTE_UNUSED,
215 enum tree_code code ATTRIBUTE_UNUSED)
217 tree inner_type = TREE_TYPE (TREE_TYPE (b));
218 HOST_WIDE_INT max;
219 tree low_bits, high_bits, b_low, result_low, signs;
221 max = GET_MODE_MASK (TYPE_MODE (inner_type));
222 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
223 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
225 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
227 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
228 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
229 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
230 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
231 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
234 /* Expand a vector operation to scalars, by using many operations
235 whose type is the vector type's inner type. */
236 static tree
237 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
238 tree type, tree inner_type,
239 tree a, tree b, enum tree_code code)
241 vec<constructor_elt, va_gc> *v;
242 tree part_width = TYPE_SIZE (inner_type);
243 tree index = bitsize_int (0);
244 int nunits = TYPE_VECTOR_SUBPARTS (type);
245 int delta = tree_to_uhwi (part_width)
246 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
247 int i;
248 location_t loc = gimple_location (gsi_stmt (*gsi));
250 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
251 warning_at (loc, OPT_Wvector_operation_performance,
252 "vector operation will be expanded piecewise");
253 else
254 warning_at (loc, OPT_Wvector_operation_performance,
255 "vector operation will be expanded in parallel");
257 vec_alloc (v, (nunits + delta - 1) / delta);
258 for (i = 0; i < nunits;
259 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
261 tree result = f (gsi, inner_type, a, b, index, part_width, code);
262 constructor_elt ce = {NULL_TREE, result};
263 v->quick_push (ce);
266 return build_constructor (type, v);
269 /* Expand a vector operation to scalars with the freedom to use
270 a scalar integer type, or to use a different size for the items
271 in the vector type. */
272 static tree
273 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
274 tree a, tree b,
275 enum tree_code code)
277 tree result, compute_type;
278 enum machine_mode mode;
279 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
280 location_t loc = gimple_location (gsi_stmt (*gsi));
282 /* We have three strategies. If the type is already correct, just do
283 the operation an element at a time. Else, if the vector is wider than
284 one word, do it a word at a time; finally, if the vector is smaller
285 than one word, do it as a scalar. */
286 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
287 return expand_vector_piecewise (gsi, f,
288 type, TREE_TYPE (type),
289 a, b, code);
290 else if (n_words > 1)
292 tree word_type = build_word_mode_vector_type (n_words);
293 result = expand_vector_piecewise (gsi, f,
294 word_type, TREE_TYPE (word_type),
295 a, b, code);
296 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
297 GSI_SAME_STMT);
299 else
301 /* Use a single scalar operation with a mode no wider than word_mode. */
302 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
303 compute_type = lang_hooks.types.type_for_mode (mode, 1);
304 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
305 warning_at (loc, OPT_Wvector_operation_performance,
306 "vector operation will be expanded with a "
307 "single scalar operation");
310 return result;
313 /* Expand a vector operation to scalars; for integer types we can use
314 special bit twiddling tricks to do the sums a word at a time, using
315 function F_PARALLEL instead of F. These tricks are done only if
316 they can process at least four items, that is, only if the vector
317 holds at least four items and if a word can hold four items. */
318 static tree
319 expand_vector_addition (gimple_stmt_iterator *gsi,
320 elem_op_func f, elem_op_func f_parallel,
321 tree type, tree a, tree b, enum tree_code code)
323 int parts_per_word = UNITS_PER_WORD
324 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
326 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
327 && parts_per_word >= 4
328 && TYPE_VECTOR_SUBPARTS (type) >= 4)
329 return expand_vector_parallel (gsi, f_parallel,
330 type, a, b, code);
331 else
332 return expand_vector_piecewise (gsi, f,
333 type, TREE_TYPE (type),
334 a, b, code);
337 /* Try to expand vector comparison expression OP0 CODE OP1 by
338 querying optab if the following expression:
339 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
340 can be expanded. */
341 static tree
342 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
343 tree op1, enum tree_code code)
345 tree t;
346 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
347 t = expand_vector_piecewise (gsi, do_compare, type,
348 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
349 else
350 t = NULL_TREE;
352 return t;
355 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
356 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
357 the result if successful, otherwise return NULL_TREE. */
358 static tree
359 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
361 optab op;
362 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
363 bool scalar_shift = true;
365 for (i = 1; i < nunits; i++)
367 if (shiftcnts[i] != shiftcnts[0])
368 scalar_shift = false;
371 if (scalar_shift && shiftcnts[0] == 0)
372 return op0;
374 if (scalar_shift)
376 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
377 if (op != unknown_optab
378 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
379 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
380 build_int_cst (NULL_TREE, shiftcnts[0]));
383 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
384 if (op != unknown_optab
385 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
387 tree *vec = XALLOCAVEC (tree, nunits);
388 for (i = 0; i < nunits; i++)
389 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
390 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
391 build_vector (type, vec));
394 return NULL_TREE;
397 /* Try to expand integer vector division by constant using
398 widening multiply, shifts and additions. */
399 static tree
400 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
401 tree op1, enum tree_code code)
403 bool use_pow2 = true;
404 bool has_vector_shift = true;
405 int mode = -1, this_mode;
406 int pre_shift = -1, post_shift;
407 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
408 int *shifts = XALLOCAVEC (int, nunits * 4);
409 int *pre_shifts = shifts + nunits;
410 int *post_shifts = pre_shifts + nunits;
411 int *shift_temps = post_shifts + nunits;
412 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
413 int prec = TYPE_PRECISION (TREE_TYPE (type));
414 int dummy_int;
415 unsigned int i;
416 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
417 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
418 tree *vec;
419 tree cur_op, mulcst, tem;
420 optab op;
422 if (prec > HOST_BITS_PER_WIDE_INT)
423 return NULL_TREE;
425 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
426 if (op == unknown_optab
427 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
428 has_vector_shift = false;
430 /* Analysis phase. Determine if all op1 elements are either power
431 of two and it is possible to expand it using shifts (or for remainder
432 using masking). Additionally compute the multiplicative constants
433 and pre and post shifts if the division is to be expanded using
434 widening or high part multiplication plus shifts. */
435 for (i = 0; i < nunits; i++)
437 tree cst = VECTOR_CST_ELT (op1, i);
438 unsigned HOST_WIDE_INT ml;
440 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
441 return NULL_TREE;
442 pre_shifts[i] = 0;
443 post_shifts[i] = 0;
444 mulc[i] = 0;
445 if (use_pow2
446 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
447 use_pow2 = false;
448 if (use_pow2)
450 shifts[i] = tree_log2 (cst);
451 if (shifts[i] != shifts[0]
452 && code == TRUNC_DIV_EXPR
453 && !has_vector_shift)
454 use_pow2 = false;
456 if (mode == -2)
457 continue;
458 if (sign_p == UNSIGNED)
460 unsigned HOST_WIDE_INT mh;
461 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
463 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
464 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
465 return NULL_TREE;
467 if (d <= 1)
469 mode = -2;
470 continue;
473 /* Find a suitable multiplier and right shift count
474 instead of multiplying with D. */
475 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
477 /* If the suggested multiplier is more than SIZE bits, we can
478 do better for even divisors, using an initial right shift. */
479 if ((mh != 0 && (d & 1) == 0)
480 || (!has_vector_shift && pre_shift != -1))
482 if (has_vector_shift)
483 pre_shift = floor_log2 (d & -d);
484 else if (pre_shift == -1)
486 unsigned int j;
487 for (j = 0; j < nunits; j++)
489 tree cst2 = VECTOR_CST_ELT (op1, j);
490 unsigned HOST_WIDE_INT d2;
491 int this_pre_shift;
493 if (!tree_fits_uhwi_p (cst2))
494 return NULL_TREE;
495 d2 = tree_to_uhwi (cst2) & mask;
496 if (d2 == 0)
497 return NULL_TREE;
498 this_pre_shift = floor_log2 (d2 & -d2);
499 if (pre_shift == -1 || this_pre_shift < pre_shift)
500 pre_shift = this_pre_shift;
502 if (i != 0 && pre_shift != 0)
504 /* Restart. */
505 i = -1U;
506 mode = -1;
507 continue;
510 if (pre_shift != 0)
512 if ((d >> pre_shift) <= 1)
514 mode = -2;
515 continue;
517 mh = choose_multiplier (d >> pre_shift, prec,
518 prec - pre_shift,
519 &ml, &post_shift, &dummy_int);
520 gcc_assert (!mh);
521 pre_shifts[i] = pre_shift;
524 if (!mh)
525 this_mode = 0;
526 else
527 this_mode = 1;
529 else
531 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
532 unsigned HOST_WIDE_INT abs_d;
534 if (d == -1)
535 return NULL_TREE;
537 /* Since d might be INT_MIN, we have to cast to
538 unsigned HOST_WIDE_INT before negating to avoid
539 undefined signed overflow. */
540 abs_d = (d >= 0
541 ? (unsigned HOST_WIDE_INT) d
542 : - (unsigned HOST_WIDE_INT) d);
544 /* n rem d = n rem -d */
545 if (code == TRUNC_MOD_EXPR && d < 0)
546 d = abs_d;
547 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
549 /* This case is not handled correctly below. */
550 mode = -2;
551 continue;
553 if (abs_d <= 1)
555 mode = -2;
556 continue;
559 choose_multiplier (abs_d, prec, prec - 1, &ml,
560 &post_shift, &dummy_int);
561 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
563 this_mode = 4 + (d < 0);
564 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
566 else
567 this_mode = 2 + (d < 0);
569 mulc[i] = ml;
570 post_shifts[i] = post_shift;
571 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
572 || post_shift >= prec
573 || pre_shifts[i] >= prec)
574 this_mode = -2;
576 if (i == 0)
577 mode = this_mode;
578 else if (mode != this_mode)
579 mode = -2;
582 vec = XALLOCAVEC (tree, nunits);
584 if (use_pow2)
586 tree addend = NULL_TREE;
587 if (sign_p == SIGNED)
589 tree uns_type;
591 /* Both division and remainder sequences need
592 op0 < 0 ? mask : 0 computed. It can be either computed as
593 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
594 if none of the shifts is 0, or as the conditional. */
595 for (i = 0; i < nunits; i++)
596 if (shifts[i] == 0)
597 break;
598 uns_type
599 = build_vector_type (build_nonstandard_integer_type (prec, 1),
600 nunits);
601 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
603 for (i = 0; i < nunits; i++)
604 shift_temps[i] = prec - 1;
605 cur_op = add_rshift (gsi, type, op0, shift_temps);
606 if (cur_op != NULL_TREE)
608 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
609 uns_type, cur_op);
610 for (i = 0; i < nunits; i++)
611 shift_temps[i] = prec - shifts[i];
612 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
613 if (cur_op != NULL_TREE)
614 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
615 type, cur_op);
618 if (addend == NULL_TREE
619 && expand_vec_cond_expr_p (type, type))
621 tree zero, cst, cond;
622 gimple stmt;
624 zero = build_zero_cst (type);
625 cond = build2 (LT_EXPR, type, op0, zero);
626 for (i = 0; i < nunits; i++)
627 vec[i] = build_int_cst (TREE_TYPE (type),
628 ((unsigned HOST_WIDE_INT) 1
629 << shifts[i]) - 1);
630 cst = build_vector (type, vec);
631 addend = make_ssa_name (type, NULL);
632 stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend,
633 cond, cst, zero);
634 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
637 if (code == TRUNC_DIV_EXPR)
639 if (sign_p == UNSIGNED)
641 /* q = op0 >> shift; */
642 cur_op = add_rshift (gsi, type, op0, shifts);
643 if (cur_op != NULL_TREE)
644 return cur_op;
646 else if (addend != NULL_TREE)
648 /* t1 = op0 + addend;
649 q = t1 >> shift; */
650 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
651 if (op != unknown_optab
652 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
654 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
655 cur_op = add_rshift (gsi, type, cur_op, shifts);
656 if (cur_op != NULL_TREE)
657 return cur_op;
661 else
663 tree mask;
664 for (i = 0; i < nunits; i++)
665 vec[i] = build_int_cst (TREE_TYPE (type),
666 ((unsigned HOST_WIDE_INT) 1
667 << shifts[i]) - 1);
668 mask = build_vector (type, vec);
669 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
670 if (op != unknown_optab
671 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
673 if (sign_p == UNSIGNED)
674 /* r = op0 & mask; */
675 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
676 else if (addend != NULL_TREE)
678 /* t1 = op0 + addend;
679 t2 = t1 & mask;
680 r = t2 - addend; */
681 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
682 if (op != unknown_optab
683 && optab_handler (op, TYPE_MODE (type))
684 != CODE_FOR_nothing)
686 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
687 addend);
688 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
689 cur_op, mask);
690 op = optab_for_tree_code (MINUS_EXPR, type,
691 optab_default);
692 if (op != unknown_optab
693 && optab_handler (op, TYPE_MODE (type))
694 != CODE_FOR_nothing)
695 return gimplify_build2 (gsi, MINUS_EXPR, type,
696 cur_op, addend);
703 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
704 return NULL_TREE;
706 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
707 return NULL_TREE;
709 cur_op = op0;
711 switch (mode)
713 case 0:
714 gcc_assert (sign_p == UNSIGNED);
715 /* t1 = oprnd0 >> pre_shift;
716 t2 = t1 h* ml;
717 q = t2 >> post_shift; */
718 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
719 if (cur_op == NULL_TREE)
720 return NULL_TREE;
721 break;
722 case 1:
723 gcc_assert (sign_p == UNSIGNED);
724 for (i = 0; i < nunits; i++)
726 shift_temps[i] = 1;
727 post_shifts[i]--;
729 break;
730 case 2:
731 case 3:
732 case 4:
733 case 5:
734 gcc_assert (sign_p == SIGNED);
735 for (i = 0; i < nunits; i++)
736 shift_temps[i] = prec - 1;
737 break;
738 default:
739 return NULL_TREE;
742 for (i = 0; i < nunits; i++)
743 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
744 mulcst = build_vector (type, vec);
746 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
748 switch (mode)
750 case 0:
751 /* t1 = oprnd0 >> pre_shift;
752 t2 = t1 h* ml;
753 q = t2 >> post_shift; */
754 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
755 break;
756 case 1:
757 /* t1 = oprnd0 h* ml;
758 t2 = oprnd0 - t1;
759 t3 = t2 >> 1;
760 t4 = t1 + t3;
761 q = t4 >> (post_shift - 1); */
762 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
763 if (op == unknown_optab
764 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
765 return NULL_TREE;
766 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
767 tem = add_rshift (gsi, type, tem, shift_temps);
768 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
769 if (op == unknown_optab
770 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
771 return NULL_TREE;
772 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
773 cur_op = add_rshift (gsi, type, tem, post_shifts);
774 if (cur_op == NULL_TREE)
775 return NULL_TREE;
776 break;
777 case 2:
778 case 3:
779 case 4:
780 case 5:
781 /* t1 = oprnd0 h* ml;
782 t2 = t1; [ iff (mode & 2) != 0 ]
783 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
784 t3 = t2 >> post_shift;
785 t4 = oprnd0 >> (prec - 1);
786 q = t3 - t4; [ iff (mode & 1) == 0 ]
787 q = t4 - t3; [ iff (mode & 1) != 0 ] */
788 if ((mode & 2) == 0)
790 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
791 if (op == unknown_optab
792 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
793 return NULL_TREE;
794 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
796 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
797 if (cur_op == NULL_TREE)
798 return NULL_TREE;
799 tem = add_rshift (gsi, type, op0, shift_temps);
800 if (tem == NULL_TREE)
801 return NULL_TREE;
802 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
803 if (op == unknown_optab
804 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
805 return NULL_TREE;
806 if ((mode & 1) == 0)
807 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
808 else
809 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
810 break;
811 default:
812 gcc_unreachable ();
815 if (code == TRUNC_DIV_EXPR)
816 return cur_op;
818 /* We divided. Now finish by:
819 t1 = q * oprnd1;
820 r = oprnd0 - t1; */
821 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
822 if (op == unknown_optab
823 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
824 return NULL_TREE;
825 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
826 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
827 if (op == unknown_optab
828 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
829 return NULL_TREE;
830 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
833 /* Expand a vector condition to scalars, by using many conditions
834 on the vector's elements. */
835 static void
836 expand_vector_condition (gimple_stmt_iterator *gsi)
838 gimple stmt = gsi_stmt (*gsi);
839 tree type = gimple_expr_type (stmt);
840 tree a = gimple_assign_rhs1 (stmt);
841 tree a1 = a;
842 tree a2;
843 bool a_is_comparison = false;
844 tree b = gimple_assign_rhs2 (stmt);
845 tree c = gimple_assign_rhs3 (stmt);
846 vec<constructor_elt, va_gc> *v;
847 tree constr;
848 tree inner_type = TREE_TYPE (type);
849 tree cond_type = TREE_TYPE (TREE_TYPE (a));
850 tree comp_inner_type = cond_type;
851 tree width = TYPE_SIZE (inner_type);
852 tree index = bitsize_int (0);
853 int nunits = TYPE_VECTOR_SUBPARTS (type);
854 int i;
855 location_t loc = gimple_location (gsi_stmt (*gsi));
857 if (!is_gimple_val (a))
859 gcc_assert (COMPARISON_CLASS_P (a));
860 a_is_comparison = true;
861 a1 = TREE_OPERAND (a, 0);
862 a2 = TREE_OPERAND (a, 1);
863 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
866 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
867 return;
869 /* TODO: try and find a smaller vector type. */
871 warning_at (loc, OPT_Wvector_operation_performance,
872 "vector condition will be expanded piecewise");
874 vec_alloc (v, nunits);
875 for (i = 0; i < nunits;
876 i++, index = int_const_binop (PLUS_EXPR, index, width))
878 tree aa, result;
879 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
880 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
881 if (a_is_comparison)
883 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
884 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
885 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
887 else
888 aa = tree_vec_extract (gsi, cond_type, a, width, index);
889 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
890 constructor_elt ce = {NULL_TREE, result};
891 v->quick_push (ce);
894 constr = build_constructor (type, v);
895 gimple_assign_set_rhs_from_tree (gsi, constr);
896 update_stmt (gsi_stmt (*gsi));
899 static tree
900 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
901 gimple assign, enum tree_code code)
903 enum machine_mode compute_mode = TYPE_MODE (compute_type);
905 /* If the compute mode is not a vector mode (hence we are not decomposing
906 a BLKmode vector to smaller, hardware-supported vectors), we may want
907 to expand the operations in parallel. */
908 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
909 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
910 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
911 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
912 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
913 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
914 switch (code)
916 case PLUS_EXPR:
917 case MINUS_EXPR:
918 if (!TYPE_OVERFLOW_TRAPS (type))
919 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
920 gimple_assign_rhs1 (assign),
921 gimple_assign_rhs2 (assign), code);
922 break;
924 case NEGATE_EXPR:
925 if (!TYPE_OVERFLOW_TRAPS (type))
926 return expand_vector_addition (gsi, do_unop, do_negate, type,
927 gimple_assign_rhs1 (assign),
928 NULL_TREE, code);
929 break;
931 case BIT_AND_EXPR:
932 case BIT_IOR_EXPR:
933 case BIT_XOR_EXPR:
934 return expand_vector_parallel (gsi, do_binop, type,
935 gimple_assign_rhs1 (assign),
936 gimple_assign_rhs2 (assign), code);
938 case BIT_NOT_EXPR:
939 return expand_vector_parallel (gsi, do_unop, type,
940 gimple_assign_rhs1 (assign),
941 NULL_TREE, code);
942 case EQ_EXPR:
943 case NE_EXPR:
944 case GT_EXPR:
945 case LT_EXPR:
946 case GE_EXPR:
947 case LE_EXPR:
948 case UNEQ_EXPR:
949 case UNGT_EXPR:
950 case UNLT_EXPR:
951 case UNGE_EXPR:
952 case UNLE_EXPR:
953 case LTGT_EXPR:
954 case ORDERED_EXPR:
955 case UNORDERED_EXPR:
957 tree rhs1 = gimple_assign_rhs1 (assign);
958 tree rhs2 = gimple_assign_rhs2 (assign);
960 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
963 case TRUNC_DIV_EXPR:
964 case TRUNC_MOD_EXPR:
966 tree rhs1 = gimple_assign_rhs1 (assign);
967 tree rhs2 = gimple_assign_rhs2 (assign);
968 tree ret;
970 if (!optimize
971 || !VECTOR_INTEGER_TYPE_P (type)
972 || TREE_CODE (rhs2) != VECTOR_CST
973 || !VECTOR_MODE_P (TYPE_MODE (type)))
974 break;
976 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
977 if (ret != NULL_TREE)
978 return ret;
979 break;
982 default:
983 break;
986 if (TREE_CODE_CLASS (code) == tcc_unary)
987 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
988 gimple_assign_rhs1 (assign),
989 NULL_TREE, code);
990 else
991 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
992 gimple_assign_rhs1 (assign),
993 gimple_assign_rhs2 (assign), code);
996 /* Try to optimize
997 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
998 style stmts into:
999 _9 = { b_7, b_7, b_7, b_7 };
1000 a_5 = _9 + { 0, 3, 6, 9 };
1001 because vector splat operation is usually more efficient
1002 than piecewise initialization of the vector. */
1004 static void
1005 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1007 gimple stmt = gsi_stmt (*gsi);
1008 tree lhs = gimple_assign_lhs (stmt);
1009 tree rhs = gimple_assign_rhs1 (stmt);
1010 tree type = TREE_TYPE (rhs);
1011 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1012 bool all_same = true;
1013 constructor_elt *elt;
1014 tree *cst;
1015 gimple g;
1016 tree base = NULL_TREE;
1017 optab op;
1019 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1020 return;
1021 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1022 if (op == unknown_optab
1023 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1024 return;
1025 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1026 if (TREE_CODE (elt->value) != SSA_NAME
1027 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1028 return;
1029 else
1031 tree this_base = elt->value;
1032 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1033 all_same = false;
1034 for (j = 0; j < nelts + 1; j++)
1036 g = SSA_NAME_DEF_STMT (this_base);
1037 if (is_gimple_assign (g)
1038 && gimple_assign_rhs_code (g) == PLUS_EXPR
1039 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1040 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1041 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1042 this_base = gimple_assign_rhs1 (g);
1043 else
1044 break;
1046 if (i == 0)
1047 base = this_base;
1048 else if (this_base != base)
1049 return;
1051 if (all_same)
1052 return;
1053 cst = XALLOCAVEC (tree, nelts);
1054 for (i = 0; i < nelts; i++)
1056 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1057 cst[i] = build_zero_cst (TREE_TYPE (base));
1058 while (this_base != base)
1060 g = SSA_NAME_DEF_STMT (this_base);
1061 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1062 cst[i], gimple_assign_rhs2 (g));
1063 if (cst[i] == NULL_TREE
1064 || TREE_CODE (cst[i]) != INTEGER_CST
1065 || TREE_OVERFLOW (cst[i]))
1066 return;
1067 this_base = gimple_assign_rhs1 (g);
1070 for (i = 0; i < nelts; i++)
1071 CONSTRUCTOR_ELT (rhs, i)->value = base;
1072 g = gimple_build_assign (make_ssa_name (type, NULL), rhs);
1073 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1074 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, gimple_assign_lhs (g),
1075 build_vector (type, cst));
1076 gsi_replace (gsi, g, false);
1079 /* Return a type for the widest vector mode whose components are of type
1080 TYPE, or NULL_TREE if none is found. */
1082 static tree
1083 type_for_widest_vector_mode (tree type, optab op)
1085 enum machine_mode inner_mode = TYPE_MODE (type);
1086 enum machine_mode best_mode = VOIDmode, mode;
1087 int best_nunits = 0;
1089 if (SCALAR_FLOAT_MODE_P (inner_mode))
1090 mode = MIN_MODE_VECTOR_FLOAT;
1091 else if (SCALAR_FRACT_MODE_P (inner_mode))
1092 mode = MIN_MODE_VECTOR_FRACT;
1093 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1094 mode = MIN_MODE_VECTOR_UFRACT;
1095 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1096 mode = MIN_MODE_VECTOR_ACCUM;
1097 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1098 mode = MIN_MODE_VECTOR_UACCUM;
1099 else
1100 mode = MIN_MODE_VECTOR_INT;
1102 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1103 if (GET_MODE_INNER (mode) == inner_mode
1104 && GET_MODE_NUNITS (mode) > best_nunits
1105 && optab_handler (op, mode) != CODE_FOR_nothing)
1106 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1108 if (best_mode == VOIDmode)
1109 return NULL_TREE;
1110 else
1111 return build_vector_type_for_mode (type, best_mode);
1115 /* Build a reference to the element of the vector VECT. Function
1116 returns either the element itself, either BIT_FIELD_REF, or an
1117 ARRAY_REF expression.
1119 GSI is required to insert temporary variables while building a
1120 refernece to the element of the vector VECT.
1122 PTMPVEC is a pointer to the temporary variable for caching
1123 purposes. In case when PTMPVEC is NULL new temporary variable
1124 will be created. */
1125 static tree
1126 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1128 tree vect_type, vect_elt_type;
1129 gimple asgn;
1130 tree tmpvec;
1131 tree arraytype;
1132 bool need_asgn = true;
1133 unsigned int elements;
1135 vect_type = TREE_TYPE (vect);
1136 vect_elt_type = TREE_TYPE (vect_type);
1137 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1139 if (TREE_CODE (idx) == INTEGER_CST)
1141 unsigned HOST_WIDE_INT index;
1143 /* Given that we're about to compute a binary modulus,
1144 we don't care about the high bits of the value. */
1145 index = TREE_INT_CST_LOW (idx);
1146 if (!tree_fits_uhwi_p (idx) || index >= elements)
1148 index &= elements - 1;
1149 idx = build_int_cst (TREE_TYPE (idx), index);
1152 /* When lowering a vector statement sequence do some easy
1153 simplification by looking through intermediate vector results. */
1154 if (TREE_CODE (vect) == SSA_NAME)
1156 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1157 if (is_gimple_assign (def_stmt)
1158 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1159 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1160 vect = gimple_assign_rhs1 (def_stmt);
1163 if (TREE_CODE (vect) == VECTOR_CST)
1164 return VECTOR_CST_ELT (vect, index);
1165 else if (TREE_CODE (vect) == CONSTRUCTOR
1166 && (CONSTRUCTOR_NELTS (vect) == 0
1167 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1168 != VECTOR_TYPE))
1170 if (index < CONSTRUCTOR_NELTS (vect))
1171 return CONSTRUCTOR_ELT (vect, index)->value;
1172 return build_zero_cst (vect_elt_type);
1174 else
1176 tree size = TYPE_SIZE (vect_elt_type);
1177 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1178 size);
1179 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1183 if (!ptmpvec)
1184 tmpvec = create_tmp_var (vect_type, "vectmp");
1185 else if (!*ptmpvec)
1186 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1187 else
1189 tmpvec = *ptmpvec;
1190 need_asgn = false;
1193 if (need_asgn)
1195 TREE_ADDRESSABLE (tmpvec) = 1;
1196 asgn = gimple_build_assign (tmpvec, vect);
1197 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1200 arraytype = build_array_type_nelts (vect_elt_type, elements);
1201 return build4 (ARRAY_REF, vect_elt_type,
1202 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1203 idx, NULL_TREE, NULL_TREE);
1206 /* Check if VEC_PERM_EXPR within the given setting is supported
1207 by hardware, or lower it piecewise.
1209 When VEC_PERM_EXPR has the same first and second operands:
1210 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1211 {v0[mask[0]], v0[mask[1]], ...}
1212 MASK and V0 must have the same number of elements.
1214 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1215 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1216 V0 and V1 must have the same type. MASK, V0, V1 must have the
1217 same number of arguments. */
1219 static void
1220 lower_vec_perm (gimple_stmt_iterator *gsi)
1222 gimple stmt = gsi_stmt (*gsi);
1223 tree mask = gimple_assign_rhs3 (stmt);
1224 tree vec0 = gimple_assign_rhs1 (stmt);
1225 tree vec1 = gimple_assign_rhs2 (stmt);
1226 tree vect_type = TREE_TYPE (vec0);
1227 tree mask_type = TREE_TYPE (mask);
1228 tree vect_elt_type = TREE_TYPE (vect_type);
1229 tree mask_elt_type = TREE_TYPE (mask_type);
1230 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1231 vec<constructor_elt, va_gc> *v;
1232 tree constr, t, si, i_val;
1233 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1234 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1235 location_t loc = gimple_location (gsi_stmt (*gsi));
1236 unsigned i;
1238 if (TREE_CODE (mask) == SSA_NAME)
1240 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1241 if (is_gimple_assign (def_stmt)
1242 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1243 mask = gimple_assign_rhs1 (def_stmt);
1246 if (TREE_CODE (mask) == VECTOR_CST)
1248 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1250 for (i = 0; i < elements; ++i)
1251 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1252 & (2 * elements - 1));
1254 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1256 gimple_assign_set_rhs3 (stmt, mask);
1257 update_stmt (stmt);
1258 return;
1261 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1262 return;
1264 warning_at (loc, OPT_Wvector_operation_performance,
1265 "vector shuffling operation will be expanded piecewise");
1267 vec_alloc (v, elements);
1268 for (i = 0; i < elements; i++)
1270 si = size_int (i);
1271 i_val = vector_element (gsi, mask, si, &masktmp);
1273 if (TREE_CODE (i_val) == INTEGER_CST)
1275 unsigned HOST_WIDE_INT index;
1277 index = TREE_INT_CST_LOW (i_val);
1278 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1279 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1281 if (two_operand_p && (index & elements) != 0)
1282 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1283 else
1284 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1286 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1287 true, GSI_SAME_STMT);
1289 else
1291 tree cond = NULL_TREE, v0_val;
1293 if (two_operand_p)
1295 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1296 build_int_cst (mask_elt_type, elements));
1297 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1298 true, GSI_SAME_STMT);
1301 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1302 build_int_cst (mask_elt_type, elements - 1));
1303 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1304 true, GSI_SAME_STMT);
1306 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1307 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1308 true, GSI_SAME_STMT);
1310 if (two_operand_p)
1312 tree v1_val;
1314 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1315 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1316 true, GSI_SAME_STMT);
1318 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1319 cond, build_zero_cst (mask_elt_type));
1320 cond = fold_build3 (COND_EXPR, vect_elt_type,
1321 cond, v0_val, v1_val);
1322 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1323 true, GSI_SAME_STMT);
1325 else
1326 t = v0_val;
1329 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1332 constr = build_constructor (vect_type, v);
1333 gimple_assign_set_rhs_from_tree (gsi, constr);
1334 update_stmt (gsi_stmt (*gsi));
1337 /* Return type in which CODE operation with optab OP can be
1338 computed. */
1340 static tree
1341 get_compute_type (enum tree_code code, optab op, tree type)
1343 /* For very wide vectors, try using a smaller vector mode. */
1344 tree compute_type = type;
1345 if (op
1346 && (!VECTOR_MODE_P (TYPE_MODE (type))
1347 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1349 tree vector_compute_type
1350 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1351 if (vector_compute_type != NULL_TREE
1352 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1353 < TYPE_VECTOR_SUBPARTS (compute_type))
1354 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1355 != CODE_FOR_nothing))
1356 compute_type = vector_compute_type;
1359 /* If we are breaking a BLKmode vector into smaller pieces,
1360 type_for_widest_vector_mode has already looked into the optab,
1361 so skip these checks. */
1362 if (compute_type == type)
1364 enum machine_mode compute_mode = TYPE_MODE (compute_type);
1365 if (VECTOR_MODE_P (compute_mode))
1367 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1368 return compute_type;
1369 if (code == MULT_HIGHPART_EXPR
1370 && can_mult_highpart_p (compute_mode,
1371 TYPE_UNSIGNED (compute_type)))
1372 return compute_type;
1374 /* There is no operation in hardware, so fall back to scalars. */
1375 compute_type = TREE_TYPE (type);
1378 return compute_type;
1381 /* Helper function of expand_vector_operations_1. Return number of
1382 vector elements for vector types or 1 for other types. */
1384 static inline int
1385 count_type_subparts (tree type)
1387 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1390 /* Process one statement. If we identify a vector operation, expand it. */
1392 static void
1393 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1395 gimple stmt = gsi_stmt (*gsi);
1396 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1397 enum tree_code code;
1398 optab op = unknown_optab;
1399 enum gimple_rhs_class rhs_class;
1400 tree new_rhs;
1402 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1403 return;
1405 code = gimple_assign_rhs_code (stmt);
1406 rhs_class = get_gimple_rhs_class (code);
1407 lhs = gimple_assign_lhs (stmt);
1409 if (code == VEC_PERM_EXPR)
1411 lower_vec_perm (gsi);
1412 return;
1415 if (code == VEC_COND_EXPR)
1417 expand_vector_condition (gsi);
1418 return;
1421 if (code == CONSTRUCTOR
1422 && TREE_CODE (lhs) == SSA_NAME
1423 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1424 && !gimple_clobber_p (stmt)
1425 && optimize)
1427 optimize_vector_constructor (gsi);
1428 return;
1431 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1432 return;
1434 rhs1 = gimple_assign_rhs1 (stmt);
1435 type = gimple_expr_type (stmt);
1436 if (rhs_class == GIMPLE_BINARY_RHS)
1437 rhs2 = gimple_assign_rhs2 (stmt);
1439 if (TREE_CODE (type) != VECTOR_TYPE)
1440 return;
1442 if (code == NOP_EXPR
1443 || code == FLOAT_EXPR
1444 || code == FIX_TRUNC_EXPR
1445 || code == VIEW_CONVERT_EXPR)
1446 return;
1448 gcc_assert (code != CONVERT_EXPR);
1450 /* The signedness is determined from input argument. */
1451 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1452 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1453 type = TREE_TYPE (rhs1);
1455 /* For widening/narrowing vector operations, the relevant type is of the
1456 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1457 calculated in the same way above. */
1458 if (code == WIDEN_SUM_EXPR
1459 || code == VEC_WIDEN_MULT_HI_EXPR
1460 || code == VEC_WIDEN_MULT_LO_EXPR
1461 || code == VEC_WIDEN_MULT_EVEN_EXPR
1462 || code == VEC_WIDEN_MULT_ODD_EXPR
1463 || code == VEC_UNPACK_HI_EXPR
1464 || code == VEC_UNPACK_LO_EXPR
1465 || code == VEC_PACK_TRUNC_EXPR
1466 || code == VEC_PACK_SAT_EXPR
1467 || code == VEC_PACK_FIX_TRUNC_EXPR
1468 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1469 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1470 type = TREE_TYPE (rhs1);
1472 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1473 scalar */
1474 if (code == LSHIFT_EXPR
1475 || code == RSHIFT_EXPR
1476 || code == LROTATE_EXPR
1477 || code == RROTATE_EXPR)
1479 optab opv;
1481 /* Check whether we have vector <op> {x,x,x,x} where x
1482 could be a scalar variable or a constant. Transform
1483 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1484 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1486 tree first;
1487 gimple def_stmt;
1489 if ((TREE_CODE (rhs2) == VECTOR_CST
1490 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1491 || (TREE_CODE (rhs2) == SSA_NAME
1492 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1493 && gimple_assign_single_p (def_stmt)
1494 && (first = uniform_vector_p
1495 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1497 gimple_assign_set_rhs2 (stmt, first);
1498 update_stmt (stmt);
1499 rhs2 = first;
1503 opv = optab_for_tree_code (code, type, optab_vector);
1504 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1505 op = opv;
1506 else
1508 op = optab_for_tree_code (code, type, optab_scalar);
1510 compute_type = get_compute_type (code, op, type);
1511 if (compute_type == type)
1512 return;
1513 /* The rtl expander will expand vector/scalar as vector/vector
1514 if necessary. Pick one with wider vector type. */
1515 tree compute_vtype = get_compute_type (code, opv, type);
1516 if (count_type_subparts (compute_vtype)
1517 > count_type_subparts (compute_type))
1519 compute_type = compute_vtype;
1520 op = opv;
1524 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1526 if (compute_type == NULL_TREE)
1527 compute_type = get_compute_type (code, op, type);
1528 if (compute_type == type)
1529 return;
1530 /* Before splitting vector rotates into scalar rotates,
1531 see if we can't use vector shifts and BIT_IOR_EXPR
1532 instead. For vector by vector rotates we'd also
1533 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1534 for now, fold doesn't seem to create such rotates anyway. */
1535 if (compute_type == TREE_TYPE (type)
1536 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1538 optab oplv = vashl_optab, opl = ashl_optab;
1539 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1540 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1541 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1542 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1543 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1544 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1545 /* The rtl expander will expand vector/scalar as vector/vector
1546 if necessary. Pick one with wider vector type. */
1547 if (count_type_subparts (compute_lvtype)
1548 > count_type_subparts (compute_ltype))
1550 compute_ltype = compute_lvtype;
1551 opl = oplv;
1553 if (count_type_subparts (compute_rvtype)
1554 > count_type_subparts (compute_rtype))
1556 compute_rtype = compute_rvtype;
1557 opr = oprv;
1559 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1560 BIT_IOR_EXPR. */
1561 compute_type = compute_ltype;
1562 if (count_type_subparts (compute_type)
1563 > count_type_subparts (compute_rtype))
1564 compute_type = compute_rtype;
1565 if (count_type_subparts (compute_type)
1566 > count_type_subparts (compute_otype))
1567 compute_type = compute_otype;
1568 /* Verify all 3 operations can be performed in that type. */
1569 if (compute_type != TREE_TYPE (type))
1571 if (optab_handler (opl, TYPE_MODE (compute_type))
1572 == CODE_FOR_nothing
1573 || optab_handler (opr, TYPE_MODE (compute_type))
1574 == CODE_FOR_nothing
1575 || optab_handler (opo, TYPE_MODE (compute_type))
1576 == CODE_FOR_nothing)
1577 compute_type = TREE_TYPE (type);
1582 else
1583 op = optab_for_tree_code (code, type, optab_default);
1585 /* Optabs will try converting a negation into a subtraction, so
1586 look for it as well. TODO: negation of floating-point vectors
1587 might be turned into an exclusive OR toggling the sign bit. */
1588 if (op == unknown_optab
1589 && code == NEGATE_EXPR
1590 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1591 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1593 if (compute_type == NULL_TREE)
1594 compute_type = get_compute_type (code, op, type);
1595 if (compute_type == type)
1596 return;
1598 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
1599 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1601 /* Leave expression untouched for later expansion. */
1602 if (new_rhs == NULL_TREE)
1603 return;
1605 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1606 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1607 new_rhs);
1609 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1610 way to do it is change expand_vector_operation and its callees to
1611 return a tree_code, RHS1 and RHS2 instead of a tree. */
1612 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1613 update_stmt (gsi_stmt (*gsi));
1616 /* Use this to lower vector operations introduced by the vectorizer,
1617 if it may need the bit-twiddling tricks implemented in this file. */
1619 static unsigned int
1620 expand_vector_operations (void)
1622 gimple_stmt_iterator gsi;
1623 basic_block bb;
1624 bool cfg_changed = false;
1626 FOR_EACH_BB_FN (bb, cfun)
1628 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1630 expand_vector_operations_1 (&gsi);
1631 /* ??? If we do not cleanup EH then we will ICE in
1632 verification. But in reality we have created wrong-code
1633 as we did not properly transition EH info and edges to
1634 the piecewise computations. */
1635 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1636 && gimple_purge_dead_eh_edges (bb))
1637 cfg_changed = true;
1641 return cfg_changed ? TODO_cleanup_cfg : 0;
1644 namespace {
1646 const pass_data pass_data_lower_vector =
1648 GIMPLE_PASS, /* type */
1649 "veclower", /* name */
1650 OPTGROUP_VEC, /* optinfo_flags */
1651 TV_NONE, /* tv_id */
1652 PROP_cfg, /* properties_required */
1653 PROP_gimple_lvec, /* properties_provided */
1654 0, /* properties_destroyed */
1655 0, /* todo_flags_start */
1656 ( TODO_update_ssa
1657 | TODO_cleanup_cfg ), /* todo_flags_finish */
1660 class pass_lower_vector : public gimple_opt_pass
1662 public:
1663 pass_lower_vector (gcc::context *ctxt)
1664 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1667 /* opt_pass methods: */
1668 virtual bool gate (function *fun)
1670 return !(fun->curr_properties & PROP_gimple_lvec);
1673 virtual unsigned int execute (function *)
1675 return expand_vector_operations ();
1678 }; // class pass_lower_vector
1680 } // anon namespace
1682 gimple_opt_pass *
1683 make_pass_lower_vector (gcc::context *ctxt)
1685 return new pass_lower_vector (ctxt);
1688 namespace {
1690 const pass_data pass_data_lower_vector_ssa =
1692 GIMPLE_PASS, /* type */
1693 "veclower2", /* name */
1694 OPTGROUP_VEC, /* optinfo_flags */
1695 TV_NONE, /* tv_id */
1696 PROP_cfg, /* properties_required */
1697 PROP_gimple_lvec, /* properties_provided */
1698 0, /* properties_destroyed */
1699 0, /* todo_flags_start */
1700 ( TODO_update_ssa
1701 | TODO_cleanup_cfg ), /* todo_flags_finish */
1704 class pass_lower_vector_ssa : public gimple_opt_pass
1706 public:
1707 pass_lower_vector_ssa (gcc::context *ctxt)
1708 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1711 /* opt_pass methods: */
1712 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1713 virtual unsigned int execute (function *)
1715 return expand_vector_operations ();
1718 }; // class pass_lower_vector_ssa
1720 } // anon namespace
1722 gimple_opt_pass *
1723 make_pass_lower_vector_ssa (gcc::context *ctxt)
1725 return new pass_lower_vector_ssa (ctxt);
1728 #include "gt-tree-vect-generic.h"