2015-09-25 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / tree-vect-generic.c
blobdad38a2247a9bc715c62ae3f1e482cafc30185ba
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "alias.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "rtl.h"
28 #include "ssa.h"
29 #include "options.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "langhooks.h"
33 #include "internal-fn.h"
34 #include "tree-eh.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "tree-cfg.h"
38 #include "tree-iterator.h"
39 #include "tree-pass.h"
40 #include "flags.h"
41 #include "diagnostic.h"
42 #include "target.h"
43 #include "expmed.h"
44 #include "insn-codes.h"
45 #include "optabs-tree.h"
48 static void expand_vector_operations_1 (gimple_stmt_iterator *);
51 /* Build a constant of type TYPE, made of VALUE's bits replicated
52 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
53 static tree
54 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
56 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
57 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
58 / HOST_BITS_PER_WIDE_INT;
59 unsigned HOST_WIDE_INT low, mask;
60 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
61 int i;
63 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
65 if (width == HOST_BITS_PER_WIDE_INT)
66 low = value;
67 else
69 mask = ((HOST_WIDE_INT)1 << width) - 1;
70 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
73 for (i = 0; i < n; i++)
74 a[i] = low;
76 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
77 return wide_int_to_tree
78 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
81 static GTY(()) tree vector_inner_type;
82 static GTY(()) tree vector_last_type;
83 static GTY(()) int vector_last_nunits;
85 /* Return a suitable vector types made of SUBPARTS units each of mode
86 "word_mode" (the global variable). */
87 static tree
88 build_word_mode_vector_type (int nunits)
90 if (!vector_inner_type)
91 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
92 else if (vector_last_nunits == nunits)
94 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
95 return vector_last_type;
98 /* We build a new type, but we canonicalize it nevertheless,
99 because it still saves some memory. */
100 vector_last_nunits = nunits;
101 vector_last_type = type_hash_canon (nunits,
102 build_vector_type (vector_inner_type,
103 nunits));
104 return vector_last_type;
107 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
108 tree, tree, tree, tree, tree, enum tree_code);
110 static inline tree
111 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
112 tree t, tree bitsize, tree bitpos)
114 if (bitpos)
115 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
116 else
117 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
120 static tree
121 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
122 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
123 enum tree_code code)
125 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
126 return gimplify_build1 (gsi, code, inner_type, a);
129 static tree
130 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
131 tree bitpos, tree bitsize, enum tree_code code)
133 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
134 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
135 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
136 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
137 return gimplify_build2 (gsi, code, inner_type, a, b);
140 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
142 INNER_TYPE is the type of A and B elements
144 returned expression is of signed integer type with the
145 size equal to the size of INNER_TYPE. */
146 static tree
147 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
148 tree bitpos, tree bitsize, enum tree_code code)
150 tree comp_type;
152 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
153 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
155 comp_type = build_nonstandard_integer_type
156 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
158 return gimplify_build3 (gsi, COND_EXPR, comp_type,
159 fold_build2 (code, boolean_type_node, a, b),
160 build_int_cst (comp_type, -1),
161 build_int_cst (comp_type, 0));
164 /* Expand vector addition to scalars. This does bit twiddling
165 in order to increase parallelism:
167 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
168 (a ^ b) & 0x80808080
170 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
171 (a ^ ~b) & 0x80808080
173 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
175 This optimization should be done only if 4 vector items or more
176 fit into a word. */
177 static tree
178 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
179 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
180 enum tree_code code)
182 tree inner_type = TREE_TYPE (TREE_TYPE (a));
183 unsigned HOST_WIDE_INT max;
184 tree low_bits, high_bits, a_low, b_low, result_low, signs;
186 max = GET_MODE_MASK (TYPE_MODE (inner_type));
187 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
188 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
190 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
191 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
193 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
194 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
195 if (code == PLUS_EXPR)
196 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
197 else
199 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
200 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
203 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
204 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
205 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
208 static tree
209 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
210 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
211 tree bitsize ATTRIBUTE_UNUSED,
212 enum tree_code code ATTRIBUTE_UNUSED)
214 tree inner_type = TREE_TYPE (TREE_TYPE (b));
215 HOST_WIDE_INT max;
216 tree low_bits, high_bits, b_low, result_low, signs;
218 max = GET_MODE_MASK (TYPE_MODE (inner_type));
219 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
220 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
222 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
224 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
225 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
226 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
227 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
228 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
231 /* Expand a vector operation to scalars, by using many operations
232 whose type is the vector type's inner type. */
233 static tree
234 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
235 tree type, tree inner_type,
236 tree a, tree b, enum tree_code code)
238 vec<constructor_elt, va_gc> *v;
239 tree part_width = TYPE_SIZE (inner_type);
240 tree index = bitsize_int (0);
241 int nunits = TYPE_VECTOR_SUBPARTS (type);
242 int delta = tree_to_uhwi (part_width)
243 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
244 int i;
245 location_t loc = gimple_location (gsi_stmt (*gsi));
247 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
248 warning_at (loc, OPT_Wvector_operation_performance,
249 "vector operation will be expanded piecewise");
250 else
251 warning_at (loc, OPT_Wvector_operation_performance,
252 "vector operation will be expanded in parallel");
254 vec_alloc (v, (nunits + delta - 1) / delta);
255 for (i = 0; i < nunits;
256 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
258 tree result = f (gsi, inner_type, a, b, index, part_width, code);
259 constructor_elt ce = {NULL_TREE, result};
260 v->quick_push (ce);
263 return build_constructor (type, v);
266 /* Expand a vector operation to scalars with the freedom to use
267 a scalar integer type, or to use a different size for the items
268 in the vector type. */
269 static tree
270 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
271 tree a, tree b,
272 enum tree_code code)
274 tree result, compute_type;
275 machine_mode mode;
276 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
277 location_t loc = gimple_location (gsi_stmt (*gsi));
279 /* We have three strategies. If the type is already correct, just do
280 the operation an element at a time. Else, if the vector is wider than
281 one word, do it a word at a time; finally, if the vector is smaller
282 than one word, do it as a scalar. */
283 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
284 return expand_vector_piecewise (gsi, f,
285 type, TREE_TYPE (type),
286 a, b, code);
287 else if (n_words > 1)
289 tree word_type = build_word_mode_vector_type (n_words);
290 result = expand_vector_piecewise (gsi, f,
291 word_type, TREE_TYPE (word_type),
292 a, b, code);
293 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
294 GSI_SAME_STMT);
296 else
298 /* Use a single scalar operation with a mode no wider than word_mode. */
299 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
300 compute_type = lang_hooks.types.type_for_mode (mode, 1);
301 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
302 warning_at (loc, OPT_Wvector_operation_performance,
303 "vector operation will be expanded with a "
304 "single scalar operation");
307 return result;
310 /* Expand a vector operation to scalars; for integer types we can use
311 special bit twiddling tricks to do the sums a word at a time, using
312 function F_PARALLEL instead of F. These tricks are done only if
313 they can process at least four items, that is, only if the vector
314 holds at least four items and if a word can hold four items. */
315 static tree
316 expand_vector_addition (gimple_stmt_iterator *gsi,
317 elem_op_func f, elem_op_func f_parallel,
318 tree type, tree a, tree b, enum tree_code code)
320 int parts_per_word = UNITS_PER_WORD
321 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
323 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
324 && parts_per_word >= 4
325 && TYPE_VECTOR_SUBPARTS (type) >= 4)
326 return expand_vector_parallel (gsi, f_parallel,
327 type, a, b, code);
328 else
329 return expand_vector_piecewise (gsi, f,
330 type, TREE_TYPE (type),
331 a, b, code);
334 /* Try to expand vector comparison expression OP0 CODE OP1 by
335 querying optab if the following expression:
336 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
337 can be expanded. */
338 static tree
339 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
340 tree op1, enum tree_code code)
342 tree t;
343 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
344 t = expand_vector_piecewise (gsi, do_compare, type,
345 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
346 else
347 t = NULL_TREE;
349 return t;
352 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
353 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
354 the result if successful, otherwise return NULL_TREE. */
355 static tree
356 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
358 optab op;
359 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
360 bool scalar_shift = true;
362 for (i = 1; i < nunits; i++)
364 if (shiftcnts[i] != shiftcnts[0])
365 scalar_shift = false;
368 if (scalar_shift && shiftcnts[0] == 0)
369 return op0;
371 if (scalar_shift)
373 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
374 if (op != unknown_optab
375 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
376 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
377 build_int_cst (NULL_TREE, shiftcnts[0]));
380 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
381 if (op != unknown_optab
382 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
384 tree *vec = XALLOCAVEC (tree, nunits);
385 for (i = 0; i < nunits; i++)
386 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
387 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
388 build_vector (type, vec));
391 return NULL_TREE;
394 /* Try to expand integer vector division by constant using
395 widening multiply, shifts and additions. */
396 static tree
397 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
398 tree op1, enum tree_code code)
400 bool use_pow2 = true;
401 bool has_vector_shift = true;
402 int mode = -1, this_mode;
403 int pre_shift = -1, post_shift;
404 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
405 int *shifts = XALLOCAVEC (int, nunits * 4);
406 int *pre_shifts = shifts + nunits;
407 int *post_shifts = pre_shifts + nunits;
408 int *shift_temps = post_shifts + nunits;
409 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
410 int prec = TYPE_PRECISION (TREE_TYPE (type));
411 int dummy_int;
412 unsigned int i;
413 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
414 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
415 tree *vec;
416 tree cur_op, mulcst, tem;
417 optab op;
419 if (prec > HOST_BITS_PER_WIDE_INT)
420 return NULL_TREE;
422 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
423 if (op == unknown_optab
424 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
425 has_vector_shift = false;
427 /* Analysis phase. Determine if all op1 elements are either power
428 of two and it is possible to expand it using shifts (or for remainder
429 using masking). Additionally compute the multiplicative constants
430 and pre and post shifts if the division is to be expanded using
431 widening or high part multiplication plus shifts. */
432 for (i = 0; i < nunits; i++)
434 tree cst = VECTOR_CST_ELT (op1, i);
435 unsigned HOST_WIDE_INT ml;
437 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
438 return NULL_TREE;
439 pre_shifts[i] = 0;
440 post_shifts[i] = 0;
441 mulc[i] = 0;
442 if (use_pow2
443 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
444 use_pow2 = false;
445 if (use_pow2)
447 shifts[i] = tree_log2 (cst);
448 if (shifts[i] != shifts[0]
449 && code == TRUNC_DIV_EXPR
450 && !has_vector_shift)
451 use_pow2 = false;
453 if (mode == -2)
454 continue;
455 if (sign_p == UNSIGNED)
457 unsigned HOST_WIDE_INT mh;
458 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
460 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
461 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
462 return NULL_TREE;
464 if (d <= 1)
466 mode = -2;
467 continue;
470 /* Find a suitable multiplier and right shift count
471 instead of multiplying with D. */
472 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
474 /* If the suggested multiplier is more than SIZE bits, we can
475 do better for even divisors, using an initial right shift. */
476 if ((mh != 0 && (d & 1) == 0)
477 || (!has_vector_shift && pre_shift != -1))
479 if (has_vector_shift)
480 pre_shift = floor_log2 (d & -d);
481 else if (pre_shift == -1)
483 unsigned int j;
484 for (j = 0; j < nunits; j++)
486 tree cst2 = VECTOR_CST_ELT (op1, j);
487 unsigned HOST_WIDE_INT d2;
488 int this_pre_shift;
490 if (!tree_fits_uhwi_p (cst2))
491 return NULL_TREE;
492 d2 = tree_to_uhwi (cst2) & mask;
493 if (d2 == 0)
494 return NULL_TREE;
495 this_pre_shift = floor_log2 (d2 & -d2);
496 if (pre_shift == -1 || this_pre_shift < pre_shift)
497 pre_shift = this_pre_shift;
499 if (i != 0 && pre_shift != 0)
501 /* Restart. */
502 i = -1U;
503 mode = -1;
504 continue;
507 if (pre_shift != 0)
509 if ((d >> pre_shift) <= 1)
511 mode = -2;
512 continue;
514 mh = choose_multiplier (d >> pre_shift, prec,
515 prec - pre_shift,
516 &ml, &post_shift, &dummy_int);
517 gcc_assert (!mh);
518 pre_shifts[i] = pre_shift;
521 if (!mh)
522 this_mode = 0;
523 else
524 this_mode = 1;
526 else
528 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
529 unsigned HOST_WIDE_INT abs_d;
531 if (d == -1)
532 return NULL_TREE;
534 /* Since d might be INT_MIN, we have to cast to
535 unsigned HOST_WIDE_INT before negating to avoid
536 undefined signed overflow. */
537 abs_d = (d >= 0
538 ? (unsigned HOST_WIDE_INT) d
539 : - (unsigned HOST_WIDE_INT) d);
541 /* n rem d = n rem -d */
542 if (code == TRUNC_MOD_EXPR && d < 0)
543 d = abs_d;
544 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
546 /* This case is not handled correctly below. */
547 mode = -2;
548 continue;
550 if (abs_d <= 1)
552 mode = -2;
553 continue;
556 choose_multiplier (abs_d, prec, prec - 1, &ml,
557 &post_shift, &dummy_int);
558 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
560 this_mode = 4 + (d < 0);
561 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
563 else
564 this_mode = 2 + (d < 0);
566 mulc[i] = ml;
567 post_shifts[i] = post_shift;
568 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
569 || post_shift >= prec
570 || pre_shifts[i] >= prec)
571 this_mode = -2;
573 if (i == 0)
574 mode = this_mode;
575 else if (mode != this_mode)
576 mode = -2;
579 vec = XALLOCAVEC (tree, nunits);
581 if (use_pow2)
583 tree addend = NULL_TREE;
584 if (sign_p == SIGNED)
586 tree uns_type;
588 /* Both division and remainder sequences need
589 op0 < 0 ? mask : 0 computed. It can be either computed as
590 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
591 if none of the shifts is 0, or as the conditional. */
592 for (i = 0; i < nunits; i++)
593 if (shifts[i] == 0)
594 break;
595 uns_type
596 = build_vector_type (build_nonstandard_integer_type (prec, 1),
597 nunits);
598 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
600 for (i = 0; i < nunits; i++)
601 shift_temps[i] = prec - 1;
602 cur_op = add_rshift (gsi, type, op0, shift_temps);
603 if (cur_op != NULL_TREE)
605 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
606 uns_type, cur_op);
607 for (i = 0; i < nunits; i++)
608 shift_temps[i] = prec - shifts[i];
609 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
610 if (cur_op != NULL_TREE)
611 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
612 type, cur_op);
615 if (addend == NULL_TREE
616 && expand_vec_cond_expr_p (type, type))
618 tree zero, cst, cond;
619 gimple *stmt;
621 zero = build_zero_cst (type);
622 cond = build2 (LT_EXPR, type, op0, zero);
623 for (i = 0; i < nunits; i++)
624 vec[i] = build_int_cst (TREE_TYPE (type),
625 ((unsigned HOST_WIDE_INT) 1
626 << shifts[i]) - 1);
627 cst = build_vector (type, vec);
628 addend = make_ssa_name (type);
629 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
630 cst, zero);
631 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
634 if (code == TRUNC_DIV_EXPR)
636 if (sign_p == UNSIGNED)
638 /* q = op0 >> shift; */
639 cur_op = add_rshift (gsi, type, op0, shifts);
640 if (cur_op != NULL_TREE)
641 return cur_op;
643 else if (addend != NULL_TREE)
645 /* t1 = op0 + addend;
646 q = t1 >> shift; */
647 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
648 if (op != unknown_optab
649 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
651 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
652 cur_op = add_rshift (gsi, type, cur_op, shifts);
653 if (cur_op != NULL_TREE)
654 return cur_op;
658 else
660 tree mask;
661 for (i = 0; i < nunits; i++)
662 vec[i] = build_int_cst (TREE_TYPE (type),
663 ((unsigned HOST_WIDE_INT) 1
664 << shifts[i]) - 1);
665 mask = build_vector (type, vec);
666 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
667 if (op != unknown_optab
668 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
670 if (sign_p == UNSIGNED)
671 /* r = op0 & mask; */
672 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
673 else if (addend != NULL_TREE)
675 /* t1 = op0 + addend;
676 t2 = t1 & mask;
677 r = t2 - addend; */
678 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
679 if (op != unknown_optab
680 && optab_handler (op, TYPE_MODE (type))
681 != CODE_FOR_nothing)
683 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
684 addend);
685 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
686 cur_op, mask);
687 op = optab_for_tree_code (MINUS_EXPR, type,
688 optab_default);
689 if (op != unknown_optab
690 && optab_handler (op, TYPE_MODE (type))
691 != CODE_FOR_nothing)
692 return gimplify_build2 (gsi, MINUS_EXPR, type,
693 cur_op, addend);
700 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
701 return NULL_TREE;
703 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
704 return NULL_TREE;
706 cur_op = op0;
708 switch (mode)
710 case 0:
711 gcc_assert (sign_p == UNSIGNED);
712 /* t1 = oprnd0 >> pre_shift;
713 t2 = t1 h* ml;
714 q = t2 >> post_shift; */
715 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
716 if (cur_op == NULL_TREE)
717 return NULL_TREE;
718 break;
719 case 1:
720 gcc_assert (sign_p == UNSIGNED);
721 for (i = 0; i < nunits; i++)
723 shift_temps[i] = 1;
724 post_shifts[i]--;
726 break;
727 case 2:
728 case 3:
729 case 4:
730 case 5:
731 gcc_assert (sign_p == SIGNED);
732 for (i = 0; i < nunits; i++)
733 shift_temps[i] = prec - 1;
734 break;
735 default:
736 return NULL_TREE;
739 for (i = 0; i < nunits; i++)
740 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
741 mulcst = build_vector (type, vec);
743 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
745 switch (mode)
747 case 0:
748 /* t1 = oprnd0 >> pre_shift;
749 t2 = t1 h* ml;
750 q = t2 >> post_shift; */
751 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
752 break;
753 case 1:
754 /* t1 = oprnd0 h* ml;
755 t2 = oprnd0 - t1;
756 t3 = t2 >> 1;
757 t4 = t1 + t3;
758 q = t4 >> (post_shift - 1); */
759 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
760 if (op == unknown_optab
761 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
762 return NULL_TREE;
763 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
764 tem = add_rshift (gsi, type, tem, shift_temps);
765 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
766 if (op == unknown_optab
767 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
768 return NULL_TREE;
769 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
770 cur_op = add_rshift (gsi, type, tem, post_shifts);
771 if (cur_op == NULL_TREE)
772 return NULL_TREE;
773 break;
774 case 2:
775 case 3:
776 case 4:
777 case 5:
778 /* t1 = oprnd0 h* ml;
779 t2 = t1; [ iff (mode & 2) != 0 ]
780 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
781 t3 = t2 >> post_shift;
782 t4 = oprnd0 >> (prec - 1);
783 q = t3 - t4; [ iff (mode & 1) == 0 ]
784 q = t4 - t3; [ iff (mode & 1) != 0 ] */
785 if ((mode & 2) == 0)
787 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
788 if (op == unknown_optab
789 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
790 return NULL_TREE;
791 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
793 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
794 if (cur_op == NULL_TREE)
795 return NULL_TREE;
796 tem = add_rshift (gsi, type, op0, shift_temps);
797 if (tem == NULL_TREE)
798 return NULL_TREE;
799 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
800 if (op == unknown_optab
801 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
802 return NULL_TREE;
803 if ((mode & 1) == 0)
804 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
805 else
806 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
807 break;
808 default:
809 gcc_unreachable ();
812 if (code == TRUNC_DIV_EXPR)
813 return cur_op;
815 /* We divided. Now finish by:
816 t1 = q * oprnd1;
817 r = oprnd0 - t1; */
818 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
819 if (op == unknown_optab
820 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
821 return NULL_TREE;
822 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
823 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
824 if (op == unknown_optab
825 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
826 return NULL_TREE;
827 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
830 /* Expand a vector condition to scalars, by using many conditions
831 on the vector's elements. */
832 static void
833 expand_vector_condition (gimple_stmt_iterator *gsi)
835 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
836 tree type = gimple_expr_type (stmt);
837 tree a = gimple_assign_rhs1 (stmt);
838 tree a1 = a;
839 tree a2;
840 bool a_is_comparison = false;
841 tree b = gimple_assign_rhs2 (stmt);
842 tree c = gimple_assign_rhs3 (stmt);
843 vec<constructor_elt, va_gc> *v;
844 tree constr;
845 tree inner_type = TREE_TYPE (type);
846 tree cond_type = TREE_TYPE (TREE_TYPE (a));
847 tree comp_inner_type = cond_type;
848 tree width = TYPE_SIZE (inner_type);
849 tree index = bitsize_int (0);
850 int nunits = TYPE_VECTOR_SUBPARTS (type);
851 int i;
852 location_t loc = gimple_location (gsi_stmt (*gsi));
854 if (!is_gimple_val (a))
856 gcc_assert (COMPARISON_CLASS_P (a));
857 a_is_comparison = true;
858 a1 = TREE_OPERAND (a, 0);
859 a2 = TREE_OPERAND (a, 1);
860 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
863 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
864 return;
866 /* TODO: try and find a smaller vector type. */
868 warning_at (loc, OPT_Wvector_operation_performance,
869 "vector condition will be expanded piecewise");
871 vec_alloc (v, nunits);
872 for (i = 0; i < nunits;
873 i++, index = int_const_binop (PLUS_EXPR, index, width))
875 tree aa, result;
876 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
877 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
878 if (a_is_comparison)
880 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
881 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
882 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
884 else
885 aa = tree_vec_extract (gsi, cond_type, a, width, index);
886 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
887 constructor_elt ce = {NULL_TREE, result};
888 v->quick_push (ce);
891 constr = build_constructor (type, v);
892 gimple_assign_set_rhs_from_tree (gsi, constr);
893 update_stmt (gsi_stmt (*gsi));
896 static tree
897 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
898 gassign *assign, enum tree_code code)
900 machine_mode compute_mode = TYPE_MODE (compute_type);
902 /* If the compute mode is not a vector mode (hence we are not decomposing
903 a BLKmode vector to smaller, hardware-supported vectors), we may want
904 to expand the operations in parallel. */
905 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
906 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
907 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
908 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
909 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
910 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
911 switch (code)
913 case PLUS_EXPR:
914 case MINUS_EXPR:
915 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
916 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
917 gimple_assign_rhs1 (assign),
918 gimple_assign_rhs2 (assign), code);
919 break;
921 case NEGATE_EXPR:
922 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
923 return expand_vector_addition (gsi, do_unop, do_negate, type,
924 gimple_assign_rhs1 (assign),
925 NULL_TREE, code);
926 break;
928 case BIT_AND_EXPR:
929 case BIT_IOR_EXPR:
930 case BIT_XOR_EXPR:
931 return expand_vector_parallel (gsi, do_binop, type,
932 gimple_assign_rhs1 (assign),
933 gimple_assign_rhs2 (assign), code);
935 case BIT_NOT_EXPR:
936 return expand_vector_parallel (gsi, do_unop, type,
937 gimple_assign_rhs1 (assign),
938 NULL_TREE, code);
939 case EQ_EXPR:
940 case NE_EXPR:
941 case GT_EXPR:
942 case LT_EXPR:
943 case GE_EXPR:
944 case LE_EXPR:
945 case UNEQ_EXPR:
946 case UNGT_EXPR:
947 case UNLT_EXPR:
948 case UNGE_EXPR:
949 case UNLE_EXPR:
950 case LTGT_EXPR:
951 case ORDERED_EXPR:
952 case UNORDERED_EXPR:
954 tree rhs1 = gimple_assign_rhs1 (assign);
955 tree rhs2 = gimple_assign_rhs2 (assign);
957 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
960 case TRUNC_DIV_EXPR:
961 case TRUNC_MOD_EXPR:
963 tree rhs1 = gimple_assign_rhs1 (assign);
964 tree rhs2 = gimple_assign_rhs2 (assign);
965 tree ret;
967 if (!optimize
968 || !VECTOR_INTEGER_TYPE_P (type)
969 || TREE_CODE (rhs2) != VECTOR_CST
970 || !VECTOR_MODE_P (TYPE_MODE (type)))
971 break;
973 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
974 if (ret != NULL_TREE)
975 return ret;
976 break;
979 default:
980 break;
983 if (TREE_CODE_CLASS (code) == tcc_unary)
984 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
985 gimple_assign_rhs1 (assign),
986 NULL_TREE, code);
987 else
988 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
989 gimple_assign_rhs1 (assign),
990 gimple_assign_rhs2 (assign), code);
993 /* Try to optimize
994 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
995 style stmts into:
996 _9 = { b_7, b_7, b_7, b_7 };
997 a_5 = _9 + { 0, 3, 6, 9 };
998 because vector splat operation is usually more efficient
999 than piecewise initialization of the vector. */
1001 static void
1002 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1004 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1005 tree lhs = gimple_assign_lhs (stmt);
1006 tree rhs = gimple_assign_rhs1 (stmt);
1007 tree type = TREE_TYPE (rhs);
1008 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1009 bool all_same = true;
1010 constructor_elt *elt;
1011 tree *cst;
1012 gimple *g;
1013 tree base = NULL_TREE;
1014 optab op;
1016 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1017 return;
1018 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1019 if (op == unknown_optab
1020 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1021 return;
1022 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1023 if (TREE_CODE (elt->value) != SSA_NAME
1024 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1025 return;
1026 else
1028 tree this_base = elt->value;
1029 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1030 all_same = false;
1031 for (j = 0; j < nelts + 1; j++)
1033 g = SSA_NAME_DEF_STMT (this_base);
1034 if (is_gimple_assign (g)
1035 && gimple_assign_rhs_code (g) == PLUS_EXPR
1036 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1037 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1038 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1039 this_base = gimple_assign_rhs1 (g);
1040 else
1041 break;
1043 if (i == 0)
1044 base = this_base;
1045 else if (this_base != base)
1046 return;
1048 if (all_same)
1049 return;
1050 cst = XALLOCAVEC (tree, nelts);
1051 for (i = 0; i < nelts; i++)
1053 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1054 cst[i] = build_zero_cst (TREE_TYPE (base));
1055 while (this_base != base)
1057 g = SSA_NAME_DEF_STMT (this_base);
1058 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1059 cst[i], gimple_assign_rhs2 (g));
1060 if (cst[i] == NULL_TREE
1061 || TREE_CODE (cst[i]) != INTEGER_CST
1062 || TREE_OVERFLOW (cst[i]))
1063 return;
1064 this_base = gimple_assign_rhs1 (g);
1067 for (i = 0; i < nelts; i++)
1068 CONSTRUCTOR_ELT (rhs, i)->value = base;
1069 g = gimple_build_assign (make_ssa_name (type), rhs);
1070 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1071 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1072 build_vector (type, cst));
1073 gsi_replace (gsi, g, false);
1076 /* Return a type for the widest vector mode whose components are of type
1077 TYPE, or NULL_TREE if none is found. */
1079 static tree
1080 type_for_widest_vector_mode (tree type, optab op)
1082 machine_mode inner_mode = TYPE_MODE (type);
1083 machine_mode best_mode = VOIDmode, mode;
1084 int best_nunits = 0;
1086 if (SCALAR_FLOAT_MODE_P (inner_mode))
1087 mode = MIN_MODE_VECTOR_FLOAT;
1088 else if (SCALAR_FRACT_MODE_P (inner_mode))
1089 mode = MIN_MODE_VECTOR_FRACT;
1090 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1091 mode = MIN_MODE_VECTOR_UFRACT;
1092 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1093 mode = MIN_MODE_VECTOR_ACCUM;
1094 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1095 mode = MIN_MODE_VECTOR_UACCUM;
1096 else
1097 mode = MIN_MODE_VECTOR_INT;
1099 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1100 if (GET_MODE_INNER (mode) == inner_mode
1101 && GET_MODE_NUNITS (mode) > best_nunits
1102 && optab_handler (op, mode) != CODE_FOR_nothing)
1103 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1105 if (best_mode == VOIDmode)
1106 return NULL_TREE;
1107 else
1108 return build_vector_type_for_mode (type, best_mode);
1112 /* Build a reference to the element of the vector VECT. Function
1113 returns either the element itself, either BIT_FIELD_REF, or an
1114 ARRAY_REF expression.
1116 GSI is required to insert temporary variables while building a
1117 refernece to the element of the vector VECT.
1119 PTMPVEC is a pointer to the temporary variable for caching
1120 purposes. In case when PTMPVEC is NULL new temporary variable
1121 will be created. */
1122 static tree
1123 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1125 tree vect_type, vect_elt_type;
1126 gimple *asgn;
1127 tree tmpvec;
1128 tree arraytype;
1129 bool need_asgn = true;
1130 unsigned int elements;
1132 vect_type = TREE_TYPE (vect);
1133 vect_elt_type = TREE_TYPE (vect_type);
1134 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1136 if (TREE_CODE (idx) == INTEGER_CST)
1138 unsigned HOST_WIDE_INT index;
1140 /* Given that we're about to compute a binary modulus,
1141 we don't care about the high bits of the value. */
1142 index = TREE_INT_CST_LOW (idx);
1143 if (!tree_fits_uhwi_p (idx) || index >= elements)
1145 index &= elements - 1;
1146 idx = build_int_cst (TREE_TYPE (idx), index);
1149 /* When lowering a vector statement sequence do some easy
1150 simplification by looking through intermediate vector results. */
1151 if (TREE_CODE (vect) == SSA_NAME)
1153 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1154 if (is_gimple_assign (def_stmt)
1155 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1156 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1157 vect = gimple_assign_rhs1 (def_stmt);
1160 if (TREE_CODE (vect) == VECTOR_CST)
1161 return VECTOR_CST_ELT (vect, index);
1162 else if (TREE_CODE (vect) == CONSTRUCTOR
1163 && (CONSTRUCTOR_NELTS (vect) == 0
1164 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1165 != VECTOR_TYPE))
1167 if (index < CONSTRUCTOR_NELTS (vect))
1168 return CONSTRUCTOR_ELT (vect, index)->value;
1169 return build_zero_cst (vect_elt_type);
1171 else
1173 tree size = TYPE_SIZE (vect_elt_type);
1174 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1175 size);
1176 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1180 if (!ptmpvec)
1181 tmpvec = create_tmp_var (vect_type, "vectmp");
1182 else if (!*ptmpvec)
1183 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1184 else
1186 tmpvec = *ptmpvec;
1187 need_asgn = false;
1190 if (need_asgn)
1192 TREE_ADDRESSABLE (tmpvec) = 1;
1193 asgn = gimple_build_assign (tmpvec, vect);
1194 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1197 arraytype = build_array_type_nelts (vect_elt_type, elements);
1198 return build4 (ARRAY_REF, vect_elt_type,
1199 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1200 idx, NULL_TREE, NULL_TREE);
1203 /* Check if VEC_PERM_EXPR within the given setting is supported
1204 by hardware, or lower it piecewise.
1206 When VEC_PERM_EXPR has the same first and second operands:
1207 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1208 {v0[mask[0]], v0[mask[1]], ...}
1209 MASK and V0 must have the same number of elements.
1211 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1212 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1213 V0 and V1 must have the same type. MASK, V0, V1 must have the
1214 same number of arguments. */
1216 static void
1217 lower_vec_perm (gimple_stmt_iterator *gsi)
1219 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1220 tree mask = gimple_assign_rhs3 (stmt);
1221 tree vec0 = gimple_assign_rhs1 (stmt);
1222 tree vec1 = gimple_assign_rhs2 (stmt);
1223 tree vect_type = TREE_TYPE (vec0);
1224 tree mask_type = TREE_TYPE (mask);
1225 tree vect_elt_type = TREE_TYPE (vect_type);
1226 tree mask_elt_type = TREE_TYPE (mask_type);
1227 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1228 vec<constructor_elt, va_gc> *v;
1229 tree constr, t, si, i_val;
1230 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1231 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1232 location_t loc = gimple_location (gsi_stmt (*gsi));
1233 unsigned i;
1235 if (TREE_CODE (mask) == SSA_NAME)
1237 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1238 if (is_gimple_assign (def_stmt)
1239 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1240 mask = gimple_assign_rhs1 (def_stmt);
1243 if (TREE_CODE (mask) == VECTOR_CST)
1245 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1247 for (i = 0; i < elements; ++i)
1248 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1249 & (2 * elements - 1));
1251 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1253 gimple_assign_set_rhs3 (stmt, mask);
1254 update_stmt (stmt);
1255 return;
1258 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1259 return;
1261 warning_at (loc, OPT_Wvector_operation_performance,
1262 "vector shuffling operation will be expanded piecewise");
1264 vec_alloc (v, elements);
1265 for (i = 0; i < elements; i++)
1267 si = size_int (i);
1268 i_val = vector_element (gsi, mask, si, &masktmp);
1270 if (TREE_CODE (i_val) == INTEGER_CST)
1272 unsigned HOST_WIDE_INT index;
1274 index = TREE_INT_CST_LOW (i_val);
1275 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1276 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1278 if (two_operand_p && (index & elements) != 0)
1279 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1280 else
1281 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1283 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1284 true, GSI_SAME_STMT);
1286 else
1288 tree cond = NULL_TREE, v0_val;
1290 if (two_operand_p)
1292 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1293 build_int_cst (mask_elt_type, elements));
1294 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1295 true, GSI_SAME_STMT);
1298 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1299 build_int_cst (mask_elt_type, elements - 1));
1300 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1301 true, GSI_SAME_STMT);
1303 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1304 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1305 true, GSI_SAME_STMT);
1307 if (two_operand_p)
1309 tree v1_val;
1311 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1312 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1313 true, GSI_SAME_STMT);
1315 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1316 cond, build_zero_cst (mask_elt_type));
1317 cond = fold_build3 (COND_EXPR, vect_elt_type,
1318 cond, v0_val, v1_val);
1319 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1320 true, GSI_SAME_STMT);
1322 else
1323 t = v0_val;
1326 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1329 constr = build_constructor (vect_type, v);
1330 gimple_assign_set_rhs_from_tree (gsi, constr);
1331 update_stmt (gsi_stmt (*gsi));
1334 /* Return type in which CODE operation with optab OP can be
1335 computed. */
1337 static tree
1338 get_compute_type (enum tree_code code, optab op, tree type)
1340 /* For very wide vectors, try using a smaller vector mode. */
1341 tree compute_type = type;
1342 if (op
1343 && (!VECTOR_MODE_P (TYPE_MODE (type))
1344 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1346 tree vector_compute_type
1347 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1348 if (vector_compute_type != NULL_TREE
1349 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1350 < TYPE_VECTOR_SUBPARTS (compute_type))
1351 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1352 != CODE_FOR_nothing))
1353 compute_type = vector_compute_type;
1356 /* If we are breaking a BLKmode vector into smaller pieces,
1357 type_for_widest_vector_mode has already looked into the optab,
1358 so skip these checks. */
1359 if (compute_type == type)
1361 machine_mode compute_mode = TYPE_MODE (compute_type);
1362 if (VECTOR_MODE_P (compute_mode))
1364 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1365 return compute_type;
1366 if (code == MULT_HIGHPART_EXPR
1367 && can_mult_highpart_p (compute_mode,
1368 TYPE_UNSIGNED (compute_type)))
1369 return compute_type;
1371 /* There is no operation in hardware, so fall back to scalars. */
1372 compute_type = TREE_TYPE (type);
1375 return compute_type;
1378 /* Helper function of expand_vector_operations_1. Return number of
1379 vector elements for vector types or 1 for other types. */
1381 static inline int
1382 count_type_subparts (tree type)
1384 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1387 static tree
1388 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1389 tree bitpos, tree bitsize, enum tree_code code)
1391 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1392 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1393 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1394 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1395 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1396 return gimplify_build3 (gsi, code, inner_type, cond, a, b);
1399 /* Expand a vector COND_EXPR to scalars, piecewise. */
1400 static void
1401 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1403 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1404 tree type = gimple_expr_type (stmt);
1405 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1406 machine_mode compute_mode = TYPE_MODE (compute_type);
1407 gcc_assert (compute_mode != BLKmode);
1408 tree lhs = gimple_assign_lhs (stmt);
1409 tree rhs2 = gimple_assign_rhs2 (stmt);
1410 tree rhs3 = gimple_assign_rhs3 (stmt);
1411 tree new_rhs;
1413 /* If the compute mode is not a vector mode (hence we are not decomposing
1414 a BLKmode vector to smaller, hardware-supported vectors), we may want
1415 to expand the operations in parallel. */
1416 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1417 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1418 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1419 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1420 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1421 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1422 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1423 COND_EXPR);
1424 else
1425 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1426 rhs2, rhs3, COND_EXPR);
1427 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1428 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1429 new_rhs);
1431 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1432 way to do it is change expand_vector_operation and its callees to
1433 return a tree_code, RHS1 and RHS2 instead of a tree. */
1434 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1435 update_stmt (gsi_stmt (*gsi));
1438 /* Process one statement. If we identify a vector operation, expand it. */
1440 static void
1441 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1443 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1444 enum tree_code code;
1445 optab op = unknown_optab;
1446 enum gimple_rhs_class rhs_class;
1447 tree new_rhs;
1449 /* Only consider code == GIMPLE_ASSIGN. */
1450 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1451 if (!stmt)
1452 return;
1454 code = gimple_assign_rhs_code (stmt);
1455 rhs_class = get_gimple_rhs_class (code);
1456 lhs = gimple_assign_lhs (stmt);
1458 if (code == VEC_PERM_EXPR)
1460 lower_vec_perm (gsi);
1461 return;
1464 if (code == VEC_COND_EXPR)
1466 expand_vector_condition (gsi);
1467 return;
1470 if (code == COND_EXPR
1471 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1472 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1474 expand_vector_scalar_condition (gsi);
1475 return;
1478 if (code == CONSTRUCTOR
1479 && TREE_CODE (lhs) == SSA_NAME
1480 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1481 && !gimple_clobber_p (stmt)
1482 && optimize)
1484 optimize_vector_constructor (gsi);
1485 return;
1488 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1489 return;
1491 rhs1 = gimple_assign_rhs1 (stmt);
1492 type = gimple_expr_type (stmt);
1493 if (rhs_class == GIMPLE_BINARY_RHS)
1494 rhs2 = gimple_assign_rhs2 (stmt);
1496 if (TREE_CODE (type) != VECTOR_TYPE)
1497 return;
1499 if (CONVERT_EXPR_CODE_P (code)
1500 || code == FLOAT_EXPR
1501 || code == FIX_TRUNC_EXPR
1502 || code == VIEW_CONVERT_EXPR)
1503 return;
1505 /* The signedness is determined from input argument. */
1506 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1507 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1508 type = TREE_TYPE (rhs1);
1510 /* For widening/narrowing vector operations, the relevant type is of the
1511 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1512 calculated in the same way above. */
1513 if (code == WIDEN_SUM_EXPR
1514 || code == VEC_WIDEN_MULT_HI_EXPR
1515 || code == VEC_WIDEN_MULT_LO_EXPR
1516 || code == VEC_WIDEN_MULT_EVEN_EXPR
1517 || code == VEC_WIDEN_MULT_ODD_EXPR
1518 || code == VEC_UNPACK_HI_EXPR
1519 || code == VEC_UNPACK_LO_EXPR
1520 || code == VEC_PACK_TRUNC_EXPR
1521 || code == VEC_PACK_SAT_EXPR
1522 || code == VEC_PACK_FIX_TRUNC_EXPR
1523 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1524 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1525 type = TREE_TYPE (rhs1);
1527 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1528 scalar */
1529 if (code == LSHIFT_EXPR
1530 || code == RSHIFT_EXPR
1531 || code == LROTATE_EXPR
1532 || code == RROTATE_EXPR)
1534 optab opv;
1536 /* Check whether we have vector <op> {x,x,x,x} where x
1537 could be a scalar variable or a constant. Transform
1538 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1539 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1541 tree first;
1542 gimple *def_stmt;
1544 if ((TREE_CODE (rhs2) == VECTOR_CST
1545 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1546 || (TREE_CODE (rhs2) == SSA_NAME
1547 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1548 && gimple_assign_single_p (def_stmt)
1549 && (first = uniform_vector_p
1550 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1552 gimple_assign_set_rhs2 (stmt, first);
1553 update_stmt (stmt);
1554 rhs2 = first;
1558 opv = optab_for_tree_code (code, type, optab_vector);
1559 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1560 op = opv;
1561 else
1563 op = optab_for_tree_code (code, type, optab_scalar);
1565 compute_type = get_compute_type (code, op, type);
1566 if (compute_type == type)
1567 return;
1568 /* The rtl expander will expand vector/scalar as vector/vector
1569 if necessary. Pick one with wider vector type. */
1570 tree compute_vtype = get_compute_type (code, opv, type);
1571 if (count_type_subparts (compute_vtype)
1572 > count_type_subparts (compute_type))
1574 compute_type = compute_vtype;
1575 op = opv;
1579 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1581 if (compute_type == NULL_TREE)
1582 compute_type = get_compute_type (code, op, type);
1583 if (compute_type == type)
1584 return;
1585 /* Before splitting vector rotates into scalar rotates,
1586 see if we can't use vector shifts and BIT_IOR_EXPR
1587 instead. For vector by vector rotates we'd also
1588 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1589 for now, fold doesn't seem to create such rotates anyway. */
1590 if (compute_type == TREE_TYPE (type)
1591 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1593 optab oplv = vashl_optab, opl = ashl_optab;
1594 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1595 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1596 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1597 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1598 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1599 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1600 /* The rtl expander will expand vector/scalar as vector/vector
1601 if necessary. Pick one with wider vector type. */
1602 if (count_type_subparts (compute_lvtype)
1603 > count_type_subparts (compute_ltype))
1605 compute_ltype = compute_lvtype;
1606 opl = oplv;
1608 if (count_type_subparts (compute_rvtype)
1609 > count_type_subparts (compute_rtype))
1611 compute_rtype = compute_rvtype;
1612 opr = oprv;
1614 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1615 BIT_IOR_EXPR. */
1616 compute_type = compute_ltype;
1617 if (count_type_subparts (compute_type)
1618 > count_type_subparts (compute_rtype))
1619 compute_type = compute_rtype;
1620 if (count_type_subparts (compute_type)
1621 > count_type_subparts (compute_otype))
1622 compute_type = compute_otype;
1623 /* Verify all 3 operations can be performed in that type. */
1624 if (compute_type != TREE_TYPE (type))
1626 if (optab_handler (opl, TYPE_MODE (compute_type))
1627 == CODE_FOR_nothing
1628 || optab_handler (opr, TYPE_MODE (compute_type))
1629 == CODE_FOR_nothing
1630 || optab_handler (opo, TYPE_MODE (compute_type))
1631 == CODE_FOR_nothing)
1632 compute_type = TREE_TYPE (type);
1637 else
1638 op = optab_for_tree_code (code, type, optab_default);
1640 /* Optabs will try converting a negation into a subtraction, so
1641 look for it as well. TODO: negation of floating-point vectors
1642 might be turned into an exclusive OR toggling the sign bit. */
1643 if (op == unknown_optab
1644 && code == NEGATE_EXPR
1645 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1646 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1648 if (compute_type == NULL_TREE)
1649 compute_type = get_compute_type (code, op, type);
1650 if (compute_type == type)
1651 return;
1653 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1655 /* Leave expression untouched for later expansion. */
1656 if (new_rhs == NULL_TREE)
1657 return;
1659 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1660 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1661 new_rhs);
1663 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1664 way to do it is change expand_vector_operation and its callees to
1665 return a tree_code, RHS1 and RHS2 instead of a tree. */
1666 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1667 update_stmt (gsi_stmt (*gsi));
1670 /* Use this to lower vector operations introduced by the vectorizer,
1671 if it may need the bit-twiddling tricks implemented in this file. */
1673 static unsigned int
1674 expand_vector_operations (void)
1676 gimple_stmt_iterator gsi;
1677 basic_block bb;
1678 bool cfg_changed = false;
1680 FOR_EACH_BB_FN (bb, cfun)
1682 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1684 expand_vector_operations_1 (&gsi);
1685 /* ??? If we do not cleanup EH then we will ICE in
1686 verification. But in reality we have created wrong-code
1687 as we did not properly transition EH info and edges to
1688 the piecewise computations. */
1689 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1690 && gimple_purge_dead_eh_edges (bb))
1691 cfg_changed = true;
1695 return cfg_changed ? TODO_cleanup_cfg : 0;
1698 namespace {
1700 const pass_data pass_data_lower_vector =
1702 GIMPLE_PASS, /* type */
1703 "veclower", /* name */
1704 OPTGROUP_VEC, /* optinfo_flags */
1705 TV_NONE, /* tv_id */
1706 PROP_cfg, /* properties_required */
1707 PROP_gimple_lvec, /* properties_provided */
1708 0, /* properties_destroyed */
1709 0, /* todo_flags_start */
1710 TODO_update_ssa, /* todo_flags_finish */
1713 class pass_lower_vector : public gimple_opt_pass
1715 public:
1716 pass_lower_vector (gcc::context *ctxt)
1717 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1720 /* opt_pass methods: */
1721 virtual bool gate (function *fun)
1723 return !(fun->curr_properties & PROP_gimple_lvec);
1726 virtual unsigned int execute (function *)
1728 return expand_vector_operations ();
1731 }; // class pass_lower_vector
1733 } // anon namespace
1735 gimple_opt_pass *
1736 make_pass_lower_vector (gcc::context *ctxt)
1738 return new pass_lower_vector (ctxt);
1741 namespace {
1743 const pass_data pass_data_lower_vector_ssa =
1745 GIMPLE_PASS, /* type */
1746 "veclower2", /* name */
1747 OPTGROUP_VEC, /* optinfo_flags */
1748 TV_NONE, /* tv_id */
1749 PROP_cfg, /* properties_required */
1750 PROP_gimple_lvec, /* properties_provided */
1751 0, /* properties_destroyed */
1752 0, /* todo_flags_start */
1753 ( TODO_update_ssa
1754 | TODO_cleanup_cfg ), /* todo_flags_finish */
1757 class pass_lower_vector_ssa : public gimple_opt_pass
1759 public:
1760 pass_lower_vector_ssa (gcc::context *ctxt)
1761 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1764 /* opt_pass methods: */
1765 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1766 virtual unsigned int execute (function *)
1768 return expand_vector_operations ();
1771 }; // class pass_lower_vector_ssa
1773 } // anon namespace
1775 gimple_opt_pass *
1776 make_pass_lower_vector_ssa (gcc::context *ctxt)
1778 return new pass_lower_vector_ssa (ctxt);
1781 #include "gt-tree-vect-generic.h"