* omp-low.c (lower_omp_target): Remove unreachable code & merge
[official-gcc.git] / gcc / tree-vect-generic.c
blobc357d2bdc132a0f396f167d091ff73ad27654a95
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "expmed.h"
31 #include "optabs-tree.h"
32 #include "diagnostic.h"
33 #include "alias.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "langhooks.h"
37 #include "internal-fn.h"
38 #include "tree-eh.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "tree-cfg.h"
42 #include "tree-iterator.h"
43 #include "flags.h"
46 static void expand_vector_operations_1 (gimple_stmt_iterator *);
49 /* Build a constant of type TYPE, made of VALUE's bits replicated
50 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
51 static tree
52 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
54 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
55 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
56 / HOST_BITS_PER_WIDE_INT;
57 unsigned HOST_WIDE_INT low, mask;
58 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
59 int i;
61 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
63 if (width == HOST_BITS_PER_WIDE_INT)
64 low = value;
65 else
67 mask = ((HOST_WIDE_INT)1 << width) - 1;
68 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
71 for (i = 0; i < n; i++)
72 a[i] = low;
74 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
75 return wide_int_to_tree
76 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
79 static GTY(()) tree vector_inner_type;
80 static GTY(()) tree vector_last_type;
81 static GTY(()) int vector_last_nunits;
83 /* Return a suitable vector types made of SUBPARTS units each of mode
84 "word_mode" (the global variable). */
85 static tree
86 build_word_mode_vector_type (int nunits)
88 if (!vector_inner_type)
89 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
90 else if (vector_last_nunits == nunits)
92 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
93 return vector_last_type;
96 /* We build a new type, but we canonicalize it nevertheless,
97 because it still saves some memory. */
98 vector_last_nunits = nunits;
99 vector_last_type = type_hash_canon (nunits,
100 build_vector_type (vector_inner_type,
101 nunits));
102 return vector_last_type;
105 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
106 tree, tree, tree, tree, tree, enum tree_code,
107 tree);
109 static inline tree
110 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
111 tree t, tree bitsize, tree bitpos)
113 if (bitpos)
115 if (TREE_CODE (type) == BOOLEAN_TYPE)
117 tree itype
118 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
119 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
120 bitsize, bitpos);
121 return gimplify_build2 (gsi, NE_EXPR, type, field,
122 build_zero_cst (itype));
124 else
125 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
127 else
128 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
131 static tree
132 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
133 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
134 enum tree_code code, tree type ATTRIBUTE_UNUSED)
136 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
137 return gimplify_build1 (gsi, code, inner_type, a);
140 static tree
141 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
142 tree bitpos, tree bitsize, enum tree_code code,
143 tree type ATTRIBUTE_UNUSED)
145 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
146 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
147 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
148 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
149 return gimplify_build2 (gsi, code, inner_type, a, b);
152 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
154 INNER_TYPE is the type of A and B elements
156 returned expression is of signed integer type with the
157 size equal to the size of INNER_TYPE. */
158 static tree
159 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
160 tree bitpos, tree bitsize, enum tree_code code, tree type)
162 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
163 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
165 return gimplify_build2 (gsi, code, TREE_TYPE (type), a, b);
168 /* Expand vector addition to scalars. This does bit twiddling
169 in order to increase parallelism:
171 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
172 (a ^ b) & 0x80808080
174 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
175 (a ^ ~b) & 0x80808080
177 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
179 This optimization should be done only if 4 vector items or more
180 fit into a word. */
181 static tree
182 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
183 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
184 enum tree_code code, tree type ATTRIBUTE_UNUSED)
186 tree inner_type = TREE_TYPE (TREE_TYPE (a));
187 unsigned HOST_WIDE_INT max;
188 tree low_bits, high_bits, a_low, b_low, result_low, signs;
190 max = GET_MODE_MASK (TYPE_MODE (inner_type));
191 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
192 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
194 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
195 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
197 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
198 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
199 if (code == PLUS_EXPR)
200 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
201 else
203 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
204 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
207 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
208 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
209 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
212 static tree
213 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
214 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
215 tree bitsize ATTRIBUTE_UNUSED,
216 enum tree_code code ATTRIBUTE_UNUSED,
217 tree type ATTRIBUTE_UNUSED)
219 tree inner_type = TREE_TYPE (TREE_TYPE (b));
220 HOST_WIDE_INT max;
221 tree low_bits, high_bits, b_low, result_low, signs;
223 max = GET_MODE_MASK (TYPE_MODE (inner_type));
224 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
225 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
227 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
229 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
230 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
231 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
232 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
233 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
236 /* Expand a vector operation to scalars, by using many operations
237 whose type is the vector type's inner type. */
238 static tree
239 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
240 tree type, tree inner_type,
241 tree a, tree b, enum tree_code code)
243 vec<constructor_elt, va_gc> *v;
244 tree part_width = TYPE_SIZE (inner_type);
245 tree index = bitsize_int (0);
246 int nunits = TYPE_VECTOR_SUBPARTS (type);
247 int delta = tree_to_uhwi (part_width)
248 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
249 int i;
250 location_t loc = gimple_location (gsi_stmt (*gsi));
252 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
253 warning_at (loc, OPT_Wvector_operation_performance,
254 "vector operation will be expanded piecewise");
255 else
256 warning_at (loc, OPT_Wvector_operation_performance,
257 "vector operation will be expanded in parallel");
259 vec_alloc (v, (nunits + delta - 1) / delta);
260 for (i = 0; i < nunits;
261 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
263 tree result = f (gsi, inner_type, a, b, index, part_width, code, type);
264 constructor_elt ce = {NULL_TREE, result};
265 v->quick_push (ce);
268 return build_constructor (type, v);
271 /* Expand a vector operation to scalars with the freedom to use
272 a scalar integer type, or to use a different size for the items
273 in the vector type. */
274 static tree
275 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
276 tree a, tree b,
277 enum tree_code code)
279 tree result, compute_type;
280 machine_mode mode;
281 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
282 location_t loc = gimple_location (gsi_stmt (*gsi));
284 /* We have three strategies. If the type is already correct, just do
285 the operation an element at a time. Else, if the vector is wider than
286 one word, do it a word at a time; finally, if the vector is smaller
287 than one word, do it as a scalar. */
288 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
289 return expand_vector_piecewise (gsi, f,
290 type, TREE_TYPE (type),
291 a, b, code);
292 else if (n_words > 1)
294 tree word_type = build_word_mode_vector_type (n_words);
295 result = expand_vector_piecewise (gsi, f,
296 word_type, TREE_TYPE (word_type),
297 a, b, code);
298 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
299 GSI_SAME_STMT);
301 else
303 /* Use a single scalar operation with a mode no wider than word_mode. */
304 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
305 compute_type = lang_hooks.types.type_for_mode (mode, 1);
306 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
307 warning_at (loc, OPT_Wvector_operation_performance,
308 "vector operation will be expanded with a "
309 "single scalar operation");
312 return result;
315 /* Expand a vector operation to scalars; for integer types we can use
316 special bit twiddling tricks to do the sums a word at a time, using
317 function F_PARALLEL instead of F. These tricks are done only if
318 they can process at least four items, that is, only if the vector
319 holds at least four items and if a word can hold four items. */
320 static tree
321 expand_vector_addition (gimple_stmt_iterator *gsi,
322 elem_op_func f, elem_op_func f_parallel,
323 tree type, tree a, tree b, enum tree_code code)
325 int parts_per_word = UNITS_PER_WORD
326 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
328 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
329 && parts_per_word >= 4
330 && TYPE_VECTOR_SUBPARTS (type) >= 4)
331 return expand_vector_parallel (gsi, f_parallel,
332 type, a, b, code);
333 else
334 return expand_vector_piecewise (gsi, f,
335 type, TREE_TYPE (type),
336 a, b, code);
339 /* Try to expand vector comparison expression OP0 CODE OP1 by
340 querying optab if the following expression:
341 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
342 can be expanded. */
343 static tree
344 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
345 tree op1, enum tree_code code)
347 tree t;
348 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
349 t = expand_vector_piecewise (gsi, do_compare, type,
350 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
351 else
352 t = NULL_TREE;
354 return t;
357 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
358 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
359 the result if successful, otherwise return NULL_TREE. */
360 static tree
361 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
363 optab op;
364 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
365 bool scalar_shift = true;
367 for (i = 1; i < nunits; i++)
369 if (shiftcnts[i] != shiftcnts[0])
370 scalar_shift = false;
373 if (scalar_shift && shiftcnts[0] == 0)
374 return op0;
376 if (scalar_shift)
378 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
379 if (op != unknown_optab
380 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
381 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
382 build_int_cst (NULL_TREE, shiftcnts[0]));
385 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
386 if (op != unknown_optab
387 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
389 tree *vec = XALLOCAVEC (tree, nunits);
390 for (i = 0; i < nunits; i++)
391 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
392 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
393 build_vector (type, vec));
396 return NULL_TREE;
399 /* Try to expand integer vector division by constant using
400 widening multiply, shifts and additions. */
401 static tree
402 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
403 tree op1, enum tree_code code)
405 bool use_pow2 = true;
406 bool has_vector_shift = true;
407 int mode = -1, this_mode;
408 int pre_shift = -1, post_shift;
409 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
410 int *shifts = XALLOCAVEC (int, nunits * 4);
411 int *pre_shifts = shifts + nunits;
412 int *post_shifts = pre_shifts + nunits;
413 int *shift_temps = post_shifts + nunits;
414 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
415 int prec = TYPE_PRECISION (TREE_TYPE (type));
416 int dummy_int;
417 unsigned int i;
418 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
419 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
420 tree *vec;
421 tree cur_op, mulcst, tem;
422 optab op;
424 if (prec > HOST_BITS_PER_WIDE_INT)
425 return NULL_TREE;
427 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
428 if (op == unknown_optab
429 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
430 has_vector_shift = false;
432 /* Analysis phase. Determine if all op1 elements are either power
433 of two and it is possible to expand it using shifts (or for remainder
434 using masking). Additionally compute the multiplicative constants
435 and pre and post shifts if the division is to be expanded using
436 widening or high part multiplication plus shifts. */
437 for (i = 0; i < nunits; i++)
439 tree cst = VECTOR_CST_ELT (op1, i);
440 unsigned HOST_WIDE_INT ml;
442 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
443 return NULL_TREE;
444 pre_shifts[i] = 0;
445 post_shifts[i] = 0;
446 mulc[i] = 0;
447 if (use_pow2
448 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
449 use_pow2 = false;
450 if (use_pow2)
452 shifts[i] = tree_log2 (cst);
453 if (shifts[i] != shifts[0]
454 && code == TRUNC_DIV_EXPR
455 && !has_vector_shift)
456 use_pow2 = false;
458 if (mode == -2)
459 continue;
460 if (sign_p == UNSIGNED)
462 unsigned HOST_WIDE_INT mh;
463 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
465 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
466 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
467 return NULL_TREE;
469 if (d <= 1)
471 mode = -2;
472 continue;
475 /* Find a suitable multiplier and right shift count
476 instead of multiplying with D. */
477 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
479 /* If the suggested multiplier is more than SIZE bits, we can
480 do better for even divisors, using an initial right shift. */
481 if ((mh != 0 && (d & 1) == 0)
482 || (!has_vector_shift && pre_shift != -1))
484 if (has_vector_shift)
485 pre_shift = floor_log2 (d & -d);
486 else if (pre_shift == -1)
488 unsigned int j;
489 for (j = 0; j < nunits; j++)
491 tree cst2 = VECTOR_CST_ELT (op1, j);
492 unsigned HOST_WIDE_INT d2;
493 int this_pre_shift;
495 if (!tree_fits_uhwi_p (cst2))
496 return NULL_TREE;
497 d2 = tree_to_uhwi (cst2) & mask;
498 if (d2 == 0)
499 return NULL_TREE;
500 this_pre_shift = floor_log2 (d2 & -d2);
501 if (pre_shift == -1 || this_pre_shift < pre_shift)
502 pre_shift = this_pre_shift;
504 if (i != 0 && pre_shift != 0)
506 /* Restart. */
507 i = -1U;
508 mode = -1;
509 continue;
512 if (pre_shift != 0)
514 if ((d >> pre_shift) <= 1)
516 mode = -2;
517 continue;
519 mh = choose_multiplier (d >> pre_shift, prec,
520 prec - pre_shift,
521 &ml, &post_shift, &dummy_int);
522 gcc_assert (!mh);
523 pre_shifts[i] = pre_shift;
526 if (!mh)
527 this_mode = 0;
528 else
529 this_mode = 1;
531 else
533 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
534 unsigned HOST_WIDE_INT abs_d;
536 if (d == -1)
537 return NULL_TREE;
539 /* Since d might be INT_MIN, we have to cast to
540 unsigned HOST_WIDE_INT before negating to avoid
541 undefined signed overflow. */
542 abs_d = (d >= 0
543 ? (unsigned HOST_WIDE_INT) d
544 : - (unsigned HOST_WIDE_INT) d);
546 /* n rem d = n rem -d */
547 if (code == TRUNC_MOD_EXPR && d < 0)
548 d = abs_d;
549 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
551 /* This case is not handled correctly below. */
552 mode = -2;
553 continue;
555 if (abs_d <= 1)
557 mode = -2;
558 continue;
561 choose_multiplier (abs_d, prec, prec - 1, &ml,
562 &post_shift, &dummy_int);
563 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
565 this_mode = 4 + (d < 0);
566 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
568 else
569 this_mode = 2 + (d < 0);
571 mulc[i] = ml;
572 post_shifts[i] = post_shift;
573 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
574 || post_shift >= prec
575 || pre_shifts[i] >= prec)
576 this_mode = -2;
578 if (i == 0)
579 mode = this_mode;
580 else if (mode != this_mode)
581 mode = -2;
584 vec = XALLOCAVEC (tree, nunits);
586 if (use_pow2)
588 tree addend = NULL_TREE;
589 if (sign_p == SIGNED)
591 tree uns_type;
593 /* Both division and remainder sequences need
594 op0 < 0 ? mask : 0 computed. It can be either computed as
595 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
596 if none of the shifts is 0, or as the conditional. */
597 for (i = 0; i < nunits; i++)
598 if (shifts[i] == 0)
599 break;
600 uns_type
601 = build_vector_type (build_nonstandard_integer_type (prec, 1),
602 nunits);
603 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
605 for (i = 0; i < nunits; i++)
606 shift_temps[i] = prec - 1;
607 cur_op = add_rshift (gsi, type, op0, shift_temps);
608 if (cur_op != NULL_TREE)
610 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
611 uns_type, cur_op);
612 for (i = 0; i < nunits; i++)
613 shift_temps[i] = prec - shifts[i];
614 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
615 if (cur_op != NULL_TREE)
616 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
617 type, cur_op);
620 if (addend == NULL_TREE
621 && expand_vec_cond_expr_p (type, type))
623 tree zero, cst, cond, mask_type;
624 gimple *stmt;
626 mask_type = build_same_sized_truth_vector_type (type);
627 zero = build_zero_cst (type);
628 cond = build2 (LT_EXPR, mask_type, op0, zero);
629 for (i = 0; i < nunits; i++)
630 vec[i] = build_int_cst (TREE_TYPE (type),
631 ((unsigned HOST_WIDE_INT) 1
632 << shifts[i]) - 1);
633 cst = build_vector (type, vec);
634 addend = make_ssa_name (type);
635 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
636 cst, zero);
637 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
640 if (code == TRUNC_DIV_EXPR)
642 if (sign_p == UNSIGNED)
644 /* q = op0 >> shift; */
645 cur_op = add_rshift (gsi, type, op0, shifts);
646 if (cur_op != NULL_TREE)
647 return cur_op;
649 else if (addend != NULL_TREE)
651 /* t1 = op0 + addend;
652 q = t1 >> shift; */
653 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
654 if (op != unknown_optab
655 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
657 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
658 cur_op = add_rshift (gsi, type, cur_op, shifts);
659 if (cur_op != NULL_TREE)
660 return cur_op;
664 else
666 tree mask;
667 for (i = 0; i < nunits; i++)
668 vec[i] = build_int_cst (TREE_TYPE (type),
669 ((unsigned HOST_WIDE_INT) 1
670 << shifts[i]) - 1);
671 mask = build_vector (type, vec);
672 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
673 if (op != unknown_optab
674 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
676 if (sign_p == UNSIGNED)
677 /* r = op0 & mask; */
678 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
679 else if (addend != NULL_TREE)
681 /* t1 = op0 + addend;
682 t2 = t1 & mask;
683 r = t2 - addend; */
684 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
685 if (op != unknown_optab
686 && optab_handler (op, TYPE_MODE (type))
687 != CODE_FOR_nothing)
689 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
690 addend);
691 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
692 cur_op, mask);
693 op = optab_for_tree_code (MINUS_EXPR, type,
694 optab_default);
695 if (op != unknown_optab
696 && optab_handler (op, TYPE_MODE (type))
697 != CODE_FOR_nothing)
698 return gimplify_build2 (gsi, MINUS_EXPR, type,
699 cur_op, addend);
706 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
707 return NULL_TREE;
709 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
710 return NULL_TREE;
712 cur_op = op0;
714 switch (mode)
716 case 0:
717 gcc_assert (sign_p == UNSIGNED);
718 /* t1 = oprnd0 >> pre_shift;
719 t2 = t1 h* ml;
720 q = t2 >> post_shift; */
721 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
722 if (cur_op == NULL_TREE)
723 return NULL_TREE;
724 break;
725 case 1:
726 gcc_assert (sign_p == UNSIGNED);
727 for (i = 0; i < nunits; i++)
729 shift_temps[i] = 1;
730 post_shifts[i]--;
732 break;
733 case 2:
734 case 3:
735 case 4:
736 case 5:
737 gcc_assert (sign_p == SIGNED);
738 for (i = 0; i < nunits; i++)
739 shift_temps[i] = prec - 1;
740 break;
741 default:
742 return NULL_TREE;
745 for (i = 0; i < nunits; i++)
746 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
747 mulcst = build_vector (type, vec);
749 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
751 switch (mode)
753 case 0:
754 /* t1 = oprnd0 >> pre_shift;
755 t2 = t1 h* ml;
756 q = t2 >> post_shift; */
757 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
758 break;
759 case 1:
760 /* t1 = oprnd0 h* ml;
761 t2 = oprnd0 - t1;
762 t3 = t2 >> 1;
763 t4 = t1 + t3;
764 q = t4 >> (post_shift - 1); */
765 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
766 if (op == unknown_optab
767 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
768 return NULL_TREE;
769 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
770 tem = add_rshift (gsi, type, tem, shift_temps);
771 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
772 if (op == unknown_optab
773 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
774 return NULL_TREE;
775 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
776 cur_op = add_rshift (gsi, type, tem, post_shifts);
777 if (cur_op == NULL_TREE)
778 return NULL_TREE;
779 break;
780 case 2:
781 case 3:
782 case 4:
783 case 5:
784 /* t1 = oprnd0 h* ml;
785 t2 = t1; [ iff (mode & 2) != 0 ]
786 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
787 t3 = t2 >> post_shift;
788 t4 = oprnd0 >> (prec - 1);
789 q = t3 - t4; [ iff (mode & 1) == 0 ]
790 q = t4 - t3; [ iff (mode & 1) != 0 ] */
791 if ((mode & 2) == 0)
793 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
794 if (op == unknown_optab
795 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
796 return NULL_TREE;
797 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
799 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
800 if (cur_op == NULL_TREE)
801 return NULL_TREE;
802 tem = add_rshift (gsi, type, op0, shift_temps);
803 if (tem == NULL_TREE)
804 return NULL_TREE;
805 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
806 if (op == unknown_optab
807 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
808 return NULL_TREE;
809 if ((mode & 1) == 0)
810 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
811 else
812 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
813 break;
814 default:
815 gcc_unreachable ();
818 if (code == TRUNC_DIV_EXPR)
819 return cur_op;
821 /* We divided. Now finish by:
822 t1 = q * oprnd1;
823 r = oprnd0 - t1; */
824 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
825 if (op == unknown_optab
826 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
827 return NULL_TREE;
828 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
829 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
830 if (op == unknown_optab
831 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
832 return NULL_TREE;
833 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
836 /* Expand a vector condition to scalars, by using many conditions
837 on the vector's elements. */
838 static void
839 expand_vector_condition (gimple_stmt_iterator *gsi)
841 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
842 tree type = gimple_expr_type (stmt);
843 tree a = gimple_assign_rhs1 (stmt);
844 tree a1 = a;
845 tree a2 = NULL_TREE;
846 bool a_is_comparison = false;
847 tree b = gimple_assign_rhs2 (stmt);
848 tree c = gimple_assign_rhs3 (stmt);
849 vec<constructor_elt, va_gc> *v;
850 tree constr;
851 tree inner_type = TREE_TYPE (type);
852 tree cond_type = TREE_TYPE (TREE_TYPE (a));
853 tree comp_inner_type = cond_type;
854 tree width = TYPE_SIZE (inner_type);
855 tree index = bitsize_int (0);
856 int nunits = TYPE_VECTOR_SUBPARTS (type);
857 int i;
858 location_t loc = gimple_location (gsi_stmt (*gsi));
860 if (!is_gimple_val (a))
862 gcc_assert (COMPARISON_CLASS_P (a));
863 a_is_comparison = true;
864 a1 = TREE_OPERAND (a, 0);
865 a2 = TREE_OPERAND (a, 1);
866 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
869 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
870 return;
872 /* TODO: try and find a smaller vector type. */
874 warning_at (loc, OPT_Wvector_operation_performance,
875 "vector condition will be expanded piecewise");
877 vec_alloc (v, nunits);
878 for (i = 0; i < nunits;
879 i++, index = int_const_binop (PLUS_EXPR, index, width))
881 tree aa, result;
882 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
883 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
884 if (a_is_comparison)
886 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
887 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
888 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
890 else
891 aa = tree_vec_extract (gsi, cond_type, a, width, index);
892 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
893 constructor_elt ce = {NULL_TREE, result};
894 v->quick_push (ce);
897 constr = build_constructor (type, v);
898 gimple_assign_set_rhs_from_tree (gsi, constr);
899 update_stmt (gsi_stmt (*gsi));
902 static tree
903 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
904 gassign *assign, enum tree_code code)
906 machine_mode compute_mode = TYPE_MODE (compute_type);
908 /* If the compute mode is not a vector mode (hence we are not decomposing
909 a BLKmode vector to smaller, hardware-supported vectors), we may want
910 to expand the operations in parallel. */
911 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
912 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
913 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
914 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
915 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
916 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
917 switch (code)
919 case PLUS_EXPR:
920 case MINUS_EXPR:
921 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
922 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
923 gimple_assign_rhs1 (assign),
924 gimple_assign_rhs2 (assign), code);
925 break;
927 case NEGATE_EXPR:
928 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
929 return expand_vector_addition (gsi, do_unop, do_negate, type,
930 gimple_assign_rhs1 (assign),
931 NULL_TREE, code);
932 break;
934 case BIT_AND_EXPR:
935 case BIT_IOR_EXPR:
936 case BIT_XOR_EXPR:
937 return expand_vector_parallel (gsi, do_binop, type,
938 gimple_assign_rhs1 (assign),
939 gimple_assign_rhs2 (assign), code);
941 case BIT_NOT_EXPR:
942 return expand_vector_parallel (gsi, do_unop, type,
943 gimple_assign_rhs1 (assign),
944 NULL_TREE, code);
945 case EQ_EXPR:
946 case NE_EXPR:
947 case GT_EXPR:
948 case LT_EXPR:
949 case GE_EXPR:
950 case LE_EXPR:
951 case UNEQ_EXPR:
952 case UNGT_EXPR:
953 case UNLT_EXPR:
954 case UNGE_EXPR:
955 case UNLE_EXPR:
956 case LTGT_EXPR:
957 case ORDERED_EXPR:
958 case UNORDERED_EXPR:
960 tree rhs1 = gimple_assign_rhs1 (assign);
961 tree rhs2 = gimple_assign_rhs2 (assign);
963 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
966 case TRUNC_DIV_EXPR:
967 case TRUNC_MOD_EXPR:
969 tree rhs1 = gimple_assign_rhs1 (assign);
970 tree rhs2 = gimple_assign_rhs2 (assign);
971 tree ret;
973 if (!optimize
974 || !VECTOR_INTEGER_TYPE_P (type)
975 || TREE_CODE (rhs2) != VECTOR_CST
976 || !VECTOR_MODE_P (TYPE_MODE (type)))
977 break;
979 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
980 if (ret != NULL_TREE)
981 return ret;
982 break;
985 default:
986 break;
989 if (TREE_CODE_CLASS (code) == tcc_unary)
990 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
991 gimple_assign_rhs1 (assign),
992 NULL_TREE, code);
993 else
994 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
995 gimple_assign_rhs1 (assign),
996 gimple_assign_rhs2 (assign), code);
999 /* Try to optimize
1000 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1001 style stmts into:
1002 _9 = { b_7, b_7, b_7, b_7 };
1003 a_5 = _9 + { 0, 3, 6, 9 };
1004 because vector splat operation is usually more efficient
1005 than piecewise initialization of the vector. */
1007 static void
1008 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1010 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1011 tree lhs = gimple_assign_lhs (stmt);
1012 tree rhs = gimple_assign_rhs1 (stmt);
1013 tree type = TREE_TYPE (rhs);
1014 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1015 bool all_same = true;
1016 constructor_elt *elt;
1017 tree *cst;
1018 gimple *g;
1019 tree base = NULL_TREE;
1020 optab op;
1022 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1023 return;
1024 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1025 if (op == unknown_optab
1026 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1027 return;
1028 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1029 if (TREE_CODE (elt->value) != SSA_NAME
1030 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1031 return;
1032 else
1034 tree this_base = elt->value;
1035 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1036 all_same = false;
1037 for (j = 0; j < nelts + 1; j++)
1039 g = SSA_NAME_DEF_STMT (this_base);
1040 if (is_gimple_assign (g)
1041 && gimple_assign_rhs_code (g) == PLUS_EXPR
1042 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1043 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1044 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1045 this_base = gimple_assign_rhs1 (g);
1046 else
1047 break;
1049 if (i == 0)
1050 base = this_base;
1051 else if (this_base != base)
1052 return;
1054 if (all_same)
1055 return;
1056 cst = XALLOCAVEC (tree, nelts);
1057 for (i = 0; i < nelts; i++)
1059 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1060 cst[i] = build_zero_cst (TREE_TYPE (base));
1061 while (this_base != base)
1063 g = SSA_NAME_DEF_STMT (this_base);
1064 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1065 cst[i], gimple_assign_rhs2 (g));
1066 if (cst[i] == NULL_TREE
1067 || TREE_CODE (cst[i]) != INTEGER_CST
1068 || TREE_OVERFLOW (cst[i]))
1069 return;
1070 this_base = gimple_assign_rhs1 (g);
1073 for (i = 0; i < nelts; i++)
1074 CONSTRUCTOR_ELT (rhs, i)->value = base;
1075 g = gimple_build_assign (make_ssa_name (type), rhs);
1076 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1077 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1078 build_vector (type, cst));
1079 gsi_replace (gsi, g, false);
1082 /* Return a type for the widest vector mode whose components are of type
1083 TYPE, or NULL_TREE if none is found. */
1085 static tree
1086 type_for_widest_vector_mode (tree type, optab op)
1088 machine_mode inner_mode = TYPE_MODE (type);
1089 machine_mode best_mode = VOIDmode, mode;
1090 int best_nunits = 0;
1092 if (SCALAR_FLOAT_MODE_P (inner_mode))
1093 mode = MIN_MODE_VECTOR_FLOAT;
1094 else if (SCALAR_FRACT_MODE_P (inner_mode))
1095 mode = MIN_MODE_VECTOR_FRACT;
1096 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1097 mode = MIN_MODE_VECTOR_UFRACT;
1098 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1099 mode = MIN_MODE_VECTOR_ACCUM;
1100 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1101 mode = MIN_MODE_VECTOR_UACCUM;
1102 else
1103 mode = MIN_MODE_VECTOR_INT;
1105 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1106 if (GET_MODE_INNER (mode) == inner_mode
1107 && GET_MODE_NUNITS (mode) > best_nunits
1108 && optab_handler (op, mode) != CODE_FOR_nothing)
1109 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1111 if (best_mode == VOIDmode)
1112 return NULL_TREE;
1113 else
1114 return build_vector_type_for_mode (type, best_mode);
1118 /* Build a reference to the element of the vector VECT. Function
1119 returns either the element itself, either BIT_FIELD_REF, or an
1120 ARRAY_REF expression.
1122 GSI is required to insert temporary variables while building a
1123 refernece to the element of the vector VECT.
1125 PTMPVEC is a pointer to the temporary variable for caching
1126 purposes. In case when PTMPVEC is NULL new temporary variable
1127 will be created. */
1128 static tree
1129 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1131 tree vect_type, vect_elt_type;
1132 gimple *asgn;
1133 tree tmpvec;
1134 tree arraytype;
1135 bool need_asgn = true;
1136 unsigned int elements;
1138 vect_type = TREE_TYPE (vect);
1139 vect_elt_type = TREE_TYPE (vect_type);
1140 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1142 if (TREE_CODE (idx) == INTEGER_CST)
1144 unsigned HOST_WIDE_INT index;
1146 /* Given that we're about to compute a binary modulus,
1147 we don't care about the high bits of the value. */
1148 index = TREE_INT_CST_LOW (idx);
1149 if (!tree_fits_uhwi_p (idx) || index >= elements)
1151 index &= elements - 1;
1152 idx = build_int_cst (TREE_TYPE (idx), index);
1155 /* When lowering a vector statement sequence do some easy
1156 simplification by looking through intermediate vector results. */
1157 if (TREE_CODE (vect) == SSA_NAME)
1159 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1160 if (is_gimple_assign (def_stmt)
1161 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1162 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1163 vect = gimple_assign_rhs1 (def_stmt);
1166 if (TREE_CODE (vect) == VECTOR_CST)
1167 return VECTOR_CST_ELT (vect, index);
1168 else if (TREE_CODE (vect) == CONSTRUCTOR
1169 && (CONSTRUCTOR_NELTS (vect) == 0
1170 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1171 != VECTOR_TYPE))
1173 if (index < CONSTRUCTOR_NELTS (vect))
1174 return CONSTRUCTOR_ELT (vect, index)->value;
1175 return build_zero_cst (vect_elt_type);
1177 else
1179 tree size = TYPE_SIZE (vect_elt_type);
1180 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1181 size);
1182 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1186 if (!ptmpvec)
1187 tmpvec = create_tmp_var (vect_type, "vectmp");
1188 else if (!*ptmpvec)
1189 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1190 else
1192 tmpvec = *ptmpvec;
1193 need_asgn = false;
1196 if (need_asgn)
1198 TREE_ADDRESSABLE (tmpvec) = 1;
1199 asgn = gimple_build_assign (tmpvec, vect);
1200 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1203 arraytype = build_array_type_nelts (vect_elt_type, elements);
1204 return build4 (ARRAY_REF, vect_elt_type,
1205 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1206 idx, NULL_TREE, NULL_TREE);
1209 /* Check if VEC_PERM_EXPR within the given setting is supported
1210 by hardware, or lower it piecewise.
1212 When VEC_PERM_EXPR has the same first and second operands:
1213 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1214 {v0[mask[0]], v0[mask[1]], ...}
1215 MASK and V0 must have the same number of elements.
1217 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1218 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1219 V0 and V1 must have the same type. MASK, V0, V1 must have the
1220 same number of arguments. */
1222 static void
1223 lower_vec_perm (gimple_stmt_iterator *gsi)
1225 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1226 tree mask = gimple_assign_rhs3 (stmt);
1227 tree vec0 = gimple_assign_rhs1 (stmt);
1228 tree vec1 = gimple_assign_rhs2 (stmt);
1229 tree vect_type = TREE_TYPE (vec0);
1230 tree mask_type = TREE_TYPE (mask);
1231 tree vect_elt_type = TREE_TYPE (vect_type);
1232 tree mask_elt_type = TREE_TYPE (mask_type);
1233 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1234 vec<constructor_elt, va_gc> *v;
1235 tree constr, t, si, i_val;
1236 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1237 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1238 location_t loc = gimple_location (gsi_stmt (*gsi));
1239 unsigned i;
1241 if (TREE_CODE (mask) == SSA_NAME)
1243 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1244 if (is_gimple_assign (def_stmt)
1245 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1246 mask = gimple_assign_rhs1 (def_stmt);
1249 if (TREE_CODE (mask) == VECTOR_CST)
1251 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1253 for (i = 0; i < elements; ++i)
1254 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1255 & (2 * elements - 1));
1257 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1259 gimple_assign_set_rhs3 (stmt, mask);
1260 update_stmt (stmt);
1261 return;
1264 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1265 return;
1267 warning_at (loc, OPT_Wvector_operation_performance,
1268 "vector shuffling operation will be expanded piecewise");
1270 vec_alloc (v, elements);
1271 for (i = 0; i < elements; i++)
1273 si = size_int (i);
1274 i_val = vector_element (gsi, mask, si, &masktmp);
1276 if (TREE_CODE (i_val) == INTEGER_CST)
1278 unsigned HOST_WIDE_INT index;
1280 index = TREE_INT_CST_LOW (i_val);
1281 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1282 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1284 if (two_operand_p && (index & elements) != 0)
1285 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1286 else
1287 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1289 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1290 true, GSI_SAME_STMT);
1292 else
1294 tree cond = NULL_TREE, v0_val;
1296 if (two_operand_p)
1298 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1299 build_int_cst (mask_elt_type, elements));
1300 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1301 true, GSI_SAME_STMT);
1304 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1305 build_int_cst (mask_elt_type, elements - 1));
1306 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1307 true, GSI_SAME_STMT);
1309 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1310 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1311 true, GSI_SAME_STMT);
1313 if (two_operand_p)
1315 tree v1_val;
1317 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1318 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1319 true, GSI_SAME_STMT);
1321 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1322 cond, build_zero_cst (mask_elt_type));
1323 cond = fold_build3 (COND_EXPR, vect_elt_type,
1324 cond, v0_val, v1_val);
1325 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1326 true, GSI_SAME_STMT);
1328 else
1329 t = v0_val;
1332 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1335 constr = build_constructor (vect_type, v);
1336 gimple_assign_set_rhs_from_tree (gsi, constr);
1337 update_stmt (gsi_stmt (*gsi));
1340 /* If OP is a uniform vector return the element it is a splat from. */
1342 static tree
1343 ssa_uniform_vector_p (tree op)
1345 if (TREE_CODE (op) == VECTOR_CST
1346 || TREE_CODE (op) == CONSTRUCTOR)
1347 return uniform_vector_p (op);
1348 if (TREE_CODE (op) == SSA_NAME)
1350 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1351 if (gimple_assign_single_p (def_stmt))
1352 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1354 return NULL_TREE;
1357 /* Return type in which CODE operation with optab OP can be
1358 computed. */
1360 static tree
1361 get_compute_type (enum tree_code code, optab op, tree type)
1363 /* For very wide vectors, try using a smaller vector mode. */
1364 tree compute_type = type;
1365 if (op
1366 && (!VECTOR_MODE_P (TYPE_MODE (type))
1367 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1369 tree vector_compute_type
1370 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1371 if (vector_compute_type != NULL_TREE
1372 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1373 < TYPE_VECTOR_SUBPARTS (compute_type))
1374 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1375 != CODE_FOR_nothing))
1376 compute_type = vector_compute_type;
1379 /* If we are breaking a BLKmode vector into smaller pieces,
1380 type_for_widest_vector_mode has already looked into the optab,
1381 so skip these checks. */
1382 if (compute_type == type)
1384 machine_mode compute_mode = TYPE_MODE (compute_type);
1385 if (VECTOR_MODE_P (compute_mode))
1387 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1388 return compute_type;
1389 if (code == MULT_HIGHPART_EXPR
1390 && can_mult_highpart_p (compute_mode,
1391 TYPE_UNSIGNED (compute_type)))
1392 return compute_type;
1394 /* There is no operation in hardware, so fall back to scalars. */
1395 compute_type = TREE_TYPE (type);
1398 return compute_type;
1401 /* Helper function of expand_vector_operations_1. Return number of
1402 vector elements for vector types or 1 for other types. */
1404 static inline int
1405 count_type_subparts (tree type)
1407 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1410 static tree
1411 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1412 tree bitpos, tree bitsize, enum tree_code code,
1413 tree type ATTRIBUTE_UNUSED)
1415 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1416 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1417 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1418 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1419 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1420 return gimplify_build3 (gsi, code, inner_type, cond, a, b);
1423 /* Expand a vector COND_EXPR to scalars, piecewise. */
1424 static void
1425 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1427 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1428 tree type = gimple_expr_type (stmt);
1429 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1430 machine_mode compute_mode = TYPE_MODE (compute_type);
1431 gcc_assert (compute_mode != BLKmode);
1432 tree lhs = gimple_assign_lhs (stmt);
1433 tree rhs2 = gimple_assign_rhs2 (stmt);
1434 tree rhs3 = gimple_assign_rhs3 (stmt);
1435 tree new_rhs;
1437 /* If the compute mode is not a vector mode (hence we are not decomposing
1438 a BLKmode vector to smaller, hardware-supported vectors), we may want
1439 to expand the operations in parallel. */
1440 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1441 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1442 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1443 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1444 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1445 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1446 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1447 COND_EXPR);
1448 else
1449 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1450 rhs2, rhs3, COND_EXPR);
1451 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1452 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1453 new_rhs);
1455 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1456 way to do it is change expand_vector_operation and its callees to
1457 return a tree_code, RHS1 and RHS2 instead of a tree. */
1458 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1459 update_stmt (gsi_stmt (*gsi));
1462 /* Process one statement. If we identify a vector operation, expand it. */
1464 static void
1465 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1467 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1468 enum tree_code code;
1469 optab op = unknown_optab;
1470 enum gimple_rhs_class rhs_class;
1471 tree new_rhs;
1473 /* Only consider code == GIMPLE_ASSIGN. */
1474 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1475 if (!stmt)
1476 return;
1478 code = gimple_assign_rhs_code (stmt);
1479 rhs_class = get_gimple_rhs_class (code);
1480 lhs = gimple_assign_lhs (stmt);
1482 if (code == VEC_PERM_EXPR)
1484 lower_vec_perm (gsi);
1485 return;
1488 if (code == VEC_COND_EXPR)
1490 expand_vector_condition (gsi);
1491 return;
1494 if (code == COND_EXPR
1495 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1496 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1498 expand_vector_scalar_condition (gsi);
1499 return;
1502 if (code == CONSTRUCTOR
1503 && TREE_CODE (lhs) == SSA_NAME
1504 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1505 && !gimple_clobber_p (stmt)
1506 && optimize)
1508 optimize_vector_constructor (gsi);
1509 return;
1512 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1513 return;
1515 rhs1 = gimple_assign_rhs1 (stmt);
1516 type = gimple_expr_type (stmt);
1517 if (rhs_class == GIMPLE_BINARY_RHS)
1518 rhs2 = gimple_assign_rhs2 (stmt);
1520 if (TREE_CODE (type) != VECTOR_TYPE)
1521 return;
1523 /* If the vector operation is operating on all same vector elements
1524 implement it with a scalar operation and a splat if the target
1525 supports the scalar operation. */
1526 tree srhs1, srhs2 = NULL_TREE;
1527 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
1528 && (rhs2 == NULL_TREE
1529 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1530 /* As we query direct optabs restrict to non-convert operations. */
1531 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
1533 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
1534 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
1535 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
1537 tree slhs = make_ssa_name (TREE_TYPE (srhs1));
1538 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
1539 gsi_insert_before (gsi, repl, GSI_SAME_STMT);
1540 gimple_assign_set_rhs_from_tree (gsi,
1541 build_vector_from_val (type, slhs));
1542 update_stmt (stmt);
1543 return;
1547 /* A scalar operation pretending to be a vector one. */
1548 if (VECTOR_BOOLEAN_TYPE_P (type)
1549 && !VECTOR_MODE_P (TYPE_MODE (type))
1550 && TYPE_MODE (type) != BLKmode)
1551 return;
1553 if (CONVERT_EXPR_CODE_P (code)
1554 || code == FLOAT_EXPR
1555 || code == FIX_TRUNC_EXPR
1556 || code == VIEW_CONVERT_EXPR)
1557 return;
1559 /* The signedness is determined from input argument. */
1560 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1561 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1562 type = TREE_TYPE (rhs1);
1564 /* For widening/narrowing vector operations, the relevant type is of the
1565 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1566 calculated in the same way above. */
1567 if (code == WIDEN_SUM_EXPR
1568 || code == VEC_WIDEN_MULT_HI_EXPR
1569 || code == VEC_WIDEN_MULT_LO_EXPR
1570 || code == VEC_WIDEN_MULT_EVEN_EXPR
1571 || code == VEC_WIDEN_MULT_ODD_EXPR
1572 || code == VEC_UNPACK_HI_EXPR
1573 || code == VEC_UNPACK_LO_EXPR
1574 || code == VEC_PACK_TRUNC_EXPR
1575 || code == VEC_PACK_SAT_EXPR
1576 || code == VEC_PACK_FIX_TRUNC_EXPR
1577 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1578 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1579 type = TREE_TYPE (rhs1);
1581 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1582 scalar */
1583 if (code == LSHIFT_EXPR
1584 || code == RSHIFT_EXPR
1585 || code == LROTATE_EXPR
1586 || code == RROTATE_EXPR)
1588 optab opv;
1590 /* Check whether we have vector <op> {x,x,x,x} where x
1591 could be a scalar variable or a constant. Transform
1592 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1593 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1595 tree first;
1597 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1599 gimple_assign_set_rhs2 (stmt, first);
1600 update_stmt (stmt);
1601 rhs2 = first;
1605 opv = optab_for_tree_code (code, type, optab_vector);
1606 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1607 op = opv;
1608 else
1610 op = optab_for_tree_code (code, type, optab_scalar);
1612 compute_type = get_compute_type (code, op, type);
1613 if (compute_type == type)
1614 return;
1615 /* The rtl expander will expand vector/scalar as vector/vector
1616 if necessary. Pick one with wider vector type. */
1617 tree compute_vtype = get_compute_type (code, opv, type);
1618 if (count_type_subparts (compute_vtype)
1619 > count_type_subparts (compute_type))
1621 compute_type = compute_vtype;
1622 op = opv;
1626 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1628 if (compute_type == NULL_TREE)
1629 compute_type = get_compute_type (code, op, type);
1630 if (compute_type == type)
1631 return;
1632 /* Before splitting vector rotates into scalar rotates,
1633 see if we can't use vector shifts and BIT_IOR_EXPR
1634 instead. For vector by vector rotates we'd also
1635 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1636 for now, fold doesn't seem to create such rotates anyway. */
1637 if (compute_type == TREE_TYPE (type)
1638 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1640 optab oplv = vashl_optab, opl = ashl_optab;
1641 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1642 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1643 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1644 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1645 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1646 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1647 /* The rtl expander will expand vector/scalar as vector/vector
1648 if necessary. Pick one with wider vector type. */
1649 if (count_type_subparts (compute_lvtype)
1650 > count_type_subparts (compute_ltype))
1652 compute_ltype = compute_lvtype;
1653 opl = oplv;
1655 if (count_type_subparts (compute_rvtype)
1656 > count_type_subparts (compute_rtype))
1658 compute_rtype = compute_rvtype;
1659 opr = oprv;
1661 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1662 BIT_IOR_EXPR. */
1663 compute_type = compute_ltype;
1664 if (count_type_subparts (compute_type)
1665 > count_type_subparts (compute_rtype))
1666 compute_type = compute_rtype;
1667 if (count_type_subparts (compute_type)
1668 > count_type_subparts (compute_otype))
1669 compute_type = compute_otype;
1670 /* Verify all 3 operations can be performed in that type. */
1671 if (compute_type != TREE_TYPE (type))
1673 if (optab_handler (opl, TYPE_MODE (compute_type))
1674 == CODE_FOR_nothing
1675 || optab_handler (opr, TYPE_MODE (compute_type))
1676 == CODE_FOR_nothing
1677 || optab_handler (opo, TYPE_MODE (compute_type))
1678 == CODE_FOR_nothing)
1679 compute_type = TREE_TYPE (type);
1684 else
1685 op = optab_for_tree_code (code, type, optab_default);
1687 /* Optabs will try converting a negation into a subtraction, so
1688 look for it as well. TODO: negation of floating-point vectors
1689 might be turned into an exclusive OR toggling the sign bit. */
1690 if (op == unknown_optab
1691 && code == NEGATE_EXPR
1692 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1693 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1695 if (compute_type == NULL_TREE)
1696 compute_type = get_compute_type (code, op, type);
1697 if (compute_type == type)
1698 return;
1700 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1702 /* Leave expression untouched for later expansion. */
1703 if (new_rhs == NULL_TREE)
1704 return;
1706 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1707 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1708 new_rhs);
1710 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1711 way to do it is change expand_vector_operation and its callees to
1712 return a tree_code, RHS1 and RHS2 instead of a tree. */
1713 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1714 update_stmt (gsi_stmt (*gsi));
1717 /* Use this to lower vector operations introduced by the vectorizer,
1718 if it may need the bit-twiddling tricks implemented in this file. */
1720 static unsigned int
1721 expand_vector_operations (void)
1723 gimple_stmt_iterator gsi;
1724 basic_block bb;
1725 bool cfg_changed = false;
1727 FOR_EACH_BB_FN (bb, cfun)
1729 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1731 expand_vector_operations_1 (&gsi);
1732 /* ??? If we do not cleanup EH then we will ICE in
1733 verification. But in reality we have created wrong-code
1734 as we did not properly transition EH info and edges to
1735 the piecewise computations. */
1736 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1737 && gimple_purge_dead_eh_edges (bb))
1738 cfg_changed = true;
1742 return cfg_changed ? TODO_cleanup_cfg : 0;
1745 namespace {
1747 const pass_data pass_data_lower_vector =
1749 GIMPLE_PASS, /* type */
1750 "veclower", /* name */
1751 OPTGROUP_VEC, /* optinfo_flags */
1752 TV_NONE, /* tv_id */
1753 PROP_cfg, /* properties_required */
1754 PROP_gimple_lvec, /* properties_provided */
1755 0, /* properties_destroyed */
1756 0, /* todo_flags_start */
1757 TODO_update_ssa, /* todo_flags_finish */
1760 class pass_lower_vector : public gimple_opt_pass
1762 public:
1763 pass_lower_vector (gcc::context *ctxt)
1764 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1767 /* opt_pass methods: */
1768 virtual bool gate (function *fun)
1770 return !(fun->curr_properties & PROP_gimple_lvec);
1773 virtual unsigned int execute (function *)
1775 return expand_vector_operations ();
1778 }; // class pass_lower_vector
1780 } // anon namespace
1782 gimple_opt_pass *
1783 make_pass_lower_vector (gcc::context *ctxt)
1785 return new pass_lower_vector (ctxt);
1788 namespace {
1790 const pass_data pass_data_lower_vector_ssa =
1792 GIMPLE_PASS, /* type */
1793 "veclower2", /* name */
1794 OPTGROUP_VEC, /* optinfo_flags */
1795 TV_NONE, /* tv_id */
1796 PROP_cfg, /* properties_required */
1797 PROP_gimple_lvec, /* properties_provided */
1798 0, /* properties_destroyed */
1799 0, /* todo_flags_start */
1800 ( TODO_update_ssa
1801 | TODO_cleanup_cfg ), /* todo_flags_finish */
1804 class pass_lower_vector_ssa : public gimple_opt_pass
1806 public:
1807 pass_lower_vector_ssa (gcc::context *ctxt)
1808 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1811 /* opt_pass methods: */
1812 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1813 virtual unsigned int execute (function *)
1815 return expand_vector_operations ();
1818 }; // class pass_lower_vector_ssa
1820 } // anon namespace
1822 gimple_opt_pass *
1823 make_pass_lower_vector_ssa (gcc::context *ctxt)
1825 return new pass_lower_vector_ssa (ctxt);
1828 #include "gt-tree-vect-generic.h"