2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-generic.c
blob822ddfb7517e759dccf53a1b840d04b97bbf591d
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "input.h"
24 #include "alias.h"
25 #include "symtab.h"
26 #include "options.h"
27 #include "tree.h"
28 #include "fold-const.h"
29 #include "stor-layout.h"
30 #include "tm.h"
31 #include "langhooks.h"
32 #include "predict.h"
33 #include "hard-reg-set.h"
34 #include "function.h"
35 #include "dominance.h"
36 #include "cfg.h"
37 #include "basic-block.h"
38 #include "tree-ssa-alias.h"
39 #include "internal-fn.h"
40 #include "tree-eh.h"
41 #include "gimple-expr.h"
42 #include "is-a.h"
43 #include "gimple.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-ssa.h"
47 #include "tree-cfg.h"
48 #include "stringpool.h"
49 #include "tree-ssanames.h"
50 #include "tree-iterator.h"
51 #include "tree-pass.h"
52 #include "flags.h"
53 #include "diagnostic.h"
54 #include "target.h"
56 /* Need to include rtl.h, expr.h, etc. for optabs. */
57 #include "rtl.h"
58 #include "insn-config.h"
59 #include "expmed.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "calls.h"
63 #include "emit-rtl.h"
64 #include "varasm.h"
65 #include "stmt.h"
66 #include "expr.h"
67 #include "insn-codes.h"
68 #include "optabs.h"
71 static void expand_vector_operations_1 (gimple_stmt_iterator *);
74 /* Build a constant of type TYPE, made of VALUE's bits replicated
75 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
76 static tree
77 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
79 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
80 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
81 / HOST_BITS_PER_WIDE_INT;
82 unsigned HOST_WIDE_INT low, mask;
83 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
84 int i;
86 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
88 if (width == HOST_BITS_PER_WIDE_INT)
89 low = value;
90 else
92 mask = ((HOST_WIDE_INT)1 << width) - 1;
93 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
96 for (i = 0; i < n; i++)
97 a[i] = low;
99 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
100 return wide_int_to_tree
101 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
104 static GTY(()) tree vector_inner_type;
105 static GTY(()) tree vector_last_type;
106 static GTY(()) int vector_last_nunits;
108 /* Return a suitable vector types made of SUBPARTS units each of mode
109 "word_mode" (the global variable). */
110 static tree
111 build_word_mode_vector_type (int nunits)
113 if (!vector_inner_type)
114 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
115 else if (vector_last_nunits == nunits)
117 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
118 return vector_last_type;
121 /* We build a new type, but we canonicalize it nevertheless,
122 because it still saves some memory. */
123 vector_last_nunits = nunits;
124 vector_last_type = type_hash_canon (nunits,
125 build_vector_type (vector_inner_type,
126 nunits));
127 return vector_last_type;
130 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
131 tree, tree, tree, tree, tree, enum tree_code);
133 static inline tree
134 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
135 tree t, tree bitsize, tree bitpos)
137 if (bitpos)
138 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
139 else
140 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
143 static tree
144 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
145 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
146 enum tree_code code)
148 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
149 return gimplify_build1 (gsi, code, inner_type, a);
152 static tree
153 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
154 tree bitpos, tree bitsize, enum tree_code code)
156 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
157 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
158 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
159 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
160 return gimplify_build2 (gsi, code, inner_type, a, b);
163 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
165 INNER_TYPE is the type of A and B elements
167 returned expression is of signed integer type with the
168 size equal to the size of INNER_TYPE. */
169 static tree
170 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
171 tree bitpos, tree bitsize, enum tree_code code)
173 tree comp_type;
175 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
176 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
178 comp_type = build_nonstandard_integer_type
179 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
181 return gimplify_build3 (gsi, COND_EXPR, comp_type,
182 fold_build2 (code, boolean_type_node, a, b),
183 build_int_cst (comp_type, -1),
184 build_int_cst (comp_type, 0));
187 /* Expand vector addition to scalars. This does bit twiddling
188 in order to increase parallelism:
190 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
191 (a ^ b) & 0x80808080
193 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
194 (a ^ ~b) & 0x80808080
196 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
198 This optimization should be done only if 4 vector items or more
199 fit into a word. */
200 static tree
201 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
202 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
203 enum tree_code code)
205 tree inner_type = TREE_TYPE (TREE_TYPE (a));
206 unsigned HOST_WIDE_INT max;
207 tree low_bits, high_bits, a_low, b_low, result_low, signs;
209 max = GET_MODE_MASK (TYPE_MODE (inner_type));
210 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
211 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
213 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
214 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
216 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
217 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
218 if (code == PLUS_EXPR)
219 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
220 else
222 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
223 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
226 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
227 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
228 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
231 static tree
232 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
233 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
234 tree bitsize ATTRIBUTE_UNUSED,
235 enum tree_code code ATTRIBUTE_UNUSED)
237 tree inner_type = TREE_TYPE (TREE_TYPE (b));
238 HOST_WIDE_INT max;
239 tree low_bits, high_bits, b_low, result_low, signs;
241 max = GET_MODE_MASK (TYPE_MODE (inner_type));
242 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
243 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
245 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
247 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
248 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
249 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
250 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
251 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
254 /* Expand a vector operation to scalars, by using many operations
255 whose type is the vector type's inner type. */
256 static tree
257 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
258 tree type, tree inner_type,
259 tree a, tree b, enum tree_code code)
261 vec<constructor_elt, va_gc> *v;
262 tree part_width = TYPE_SIZE (inner_type);
263 tree index = bitsize_int (0);
264 int nunits = TYPE_VECTOR_SUBPARTS (type);
265 int delta = tree_to_uhwi (part_width)
266 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
267 int i;
268 location_t loc = gimple_location (gsi_stmt (*gsi));
270 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
271 warning_at (loc, OPT_Wvector_operation_performance,
272 "vector operation will be expanded piecewise");
273 else
274 warning_at (loc, OPT_Wvector_operation_performance,
275 "vector operation will be expanded in parallel");
277 vec_alloc (v, (nunits + delta - 1) / delta);
278 for (i = 0; i < nunits;
279 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
281 tree result = f (gsi, inner_type, a, b, index, part_width, code);
282 constructor_elt ce = {NULL_TREE, result};
283 v->quick_push (ce);
286 return build_constructor (type, v);
289 /* Expand a vector operation to scalars with the freedom to use
290 a scalar integer type, or to use a different size for the items
291 in the vector type. */
292 static tree
293 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
294 tree a, tree b,
295 enum tree_code code)
297 tree result, compute_type;
298 machine_mode mode;
299 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
300 location_t loc = gimple_location (gsi_stmt (*gsi));
302 /* We have three strategies. If the type is already correct, just do
303 the operation an element at a time. Else, if the vector is wider than
304 one word, do it a word at a time; finally, if the vector is smaller
305 than one word, do it as a scalar. */
306 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
307 return expand_vector_piecewise (gsi, f,
308 type, TREE_TYPE (type),
309 a, b, code);
310 else if (n_words > 1)
312 tree word_type = build_word_mode_vector_type (n_words);
313 result = expand_vector_piecewise (gsi, f,
314 word_type, TREE_TYPE (word_type),
315 a, b, code);
316 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
317 GSI_SAME_STMT);
319 else
321 /* Use a single scalar operation with a mode no wider than word_mode. */
322 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
323 compute_type = lang_hooks.types.type_for_mode (mode, 1);
324 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
325 warning_at (loc, OPT_Wvector_operation_performance,
326 "vector operation will be expanded with a "
327 "single scalar operation");
330 return result;
333 /* Expand a vector operation to scalars; for integer types we can use
334 special bit twiddling tricks to do the sums a word at a time, using
335 function F_PARALLEL instead of F. These tricks are done only if
336 they can process at least four items, that is, only if the vector
337 holds at least four items and if a word can hold four items. */
338 static tree
339 expand_vector_addition (gimple_stmt_iterator *gsi,
340 elem_op_func f, elem_op_func f_parallel,
341 tree type, tree a, tree b, enum tree_code code)
343 int parts_per_word = UNITS_PER_WORD
344 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
346 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
347 && parts_per_word >= 4
348 && TYPE_VECTOR_SUBPARTS (type) >= 4)
349 return expand_vector_parallel (gsi, f_parallel,
350 type, a, b, code);
351 else
352 return expand_vector_piecewise (gsi, f,
353 type, TREE_TYPE (type),
354 a, b, code);
357 /* Try to expand vector comparison expression OP0 CODE OP1 by
358 querying optab if the following expression:
359 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
360 can be expanded. */
361 static tree
362 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
363 tree op1, enum tree_code code)
365 tree t;
366 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
367 t = expand_vector_piecewise (gsi, do_compare, type,
368 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
369 else
370 t = NULL_TREE;
372 return t;
375 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
376 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
377 the result if successful, otherwise return NULL_TREE. */
378 static tree
379 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
381 optab op;
382 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
383 bool scalar_shift = true;
385 for (i = 1; i < nunits; i++)
387 if (shiftcnts[i] != shiftcnts[0])
388 scalar_shift = false;
391 if (scalar_shift && shiftcnts[0] == 0)
392 return op0;
394 if (scalar_shift)
396 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
397 if (op != unknown_optab
398 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
399 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
400 build_int_cst (NULL_TREE, shiftcnts[0]));
403 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
404 if (op != unknown_optab
405 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
407 tree *vec = XALLOCAVEC (tree, nunits);
408 for (i = 0; i < nunits; i++)
409 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
410 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
411 build_vector (type, vec));
414 return NULL_TREE;
417 /* Try to expand integer vector division by constant using
418 widening multiply, shifts and additions. */
419 static tree
420 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
421 tree op1, enum tree_code code)
423 bool use_pow2 = true;
424 bool has_vector_shift = true;
425 int mode = -1, this_mode;
426 int pre_shift = -1, post_shift;
427 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
428 int *shifts = XALLOCAVEC (int, nunits * 4);
429 int *pre_shifts = shifts + nunits;
430 int *post_shifts = pre_shifts + nunits;
431 int *shift_temps = post_shifts + nunits;
432 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
433 int prec = TYPE_PRECISION (TREE_TYPE (type));
434 int dummy_int;
435 unsigned int i;
436 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
437 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
438 tree *vec;
439 tree cur_op, mulcst, tem;
440 optab op;
442 if (prec > HOST_BITS_PER_WIDE_INT)
443 return NULL_TREE;
445 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
446 if (op == unknown_optab
447 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
448 has_vector_shift = false;
450 /* Analysis phase. Determine if all op1 elements are either power
451 of two and it is possible to expand it using shifts (or for remainder
452 using masking). Additionally compute the multiplicative constants
453 and pre and post shifts if the division is to be expanded using
454 widening or high part multiplication plus shifts. */
455 for (i = 0; i < nunits; i++)
457 tree cst = VECTOR_CST_ELT (op1, i);
458 unsigned HOST_WIDE_INT ml;
460 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
461 return NULL_TREE;
462 pre_shifts[i] = 0;
463 post_shifts[i] = 0;
464 mulc[i] = 0;
465 if (use_pow2
466 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
467 use_pow2 = false;
468 if (use_pow2)
470 shifts[i] = tree_log2 (cst);
471 if (shifts[i] != shifts[0]
472 && code == TRUNC_DIV_EXPR
473 && !has_vector_shift)
474 use_pow2 = false;
476 if (mode == -2)
477 continue;
478 if (sign_p == UNSIGNED)
480 unsigned HOST_WIDE_INT mh;
481 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
483 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
484 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
485 return NULL_TREE;
487 if (d <= 1)
489 mode = -2;
490 continue;
493 /* Find a suitable multiplier and right shift count
494 instead of multiplying with D. */
495 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
497 /* If the suggested multiplier is more than SIZE bits, we can
498 do better for even divisors, using an initial right shift. */
499 if ((mh != 0 && (d & 1) == 0)
500 || (!has_vector_shift && pre_shift != -1))
502 if (has_vector_shift)
503 pre_shift = floor_log2 (d & -d);
504 else if (pre_shift == -1)
506 unsigned int j;
507 for (j = 0; j < nunits; j++)
509 tree cst2 = VECTOR_CST_ELT (op1, j);
510 unsigned HOST_WIDE_INT d2;
511 int this_pre_shift;
513 if (!tree_fits_uhwi_p (cst2))
514 return NULL_TREE;
515 d2 = tree_to_uhwi (cst2) & mask;
516 if (d2 == 0)
517 return NULL_TREE;
518 this_pre_shift = floor_log2 (d2 & -d2);
519 if (pre_shift == -1 || this_pre_shift < pre_shift)
520 pre_shift = this_pre_shift;
522 if (i != 0 && pre_shift != 0)
524 /* Restart. */
525 i = -1U;
526 mode = -1;
527 continue;
530 if (pre_shift != 0)
532 if ((d >> pre_shift) <= 1)
534 mode = -2;
535 continue;
537 mh = choose_multiplier (d >> pre_shift, prec,
538 prec - pre_shift,
539 &ml, &post_shift, &dummy_int);
540 gcc_assert (!mh);
541 pre_shifts[i] = pre_shift;
544 if (!mh)
545 this_mode = 0;
546 else
547 this_mode = 1;
549 else
551 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
552 unsigned HOST_WIDE_INT abs_d;
554 if (d == -1)
555 return NULL_TREE;
557 /* Since d might be INT_MIN, we have to cast to
558 unsigned HOST_WIDE_INT before negating to avoid
559 undefined signed overflow. */
560 abs_d = (d >= 0
561 ? (unsigned HOST_WIDE_INT) d
562 : - (unsigned HOST_WIDE_INT) d);
564 /* n rem d = n rem -d */
565 if (code == TRUNC_MOD_EXPR && d < 0)
566 d = abs_d;
567 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
569 /* This case is not handled correctly below. */
570 mode = -2;
571 continue;
573 if (abs_d <= 1)
575 mode = -2;
576 continue;
579 choose_multiplier (abs_d, prec, prec - 1, &ml,
580 &post_shift, &dummy_int);
581 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
583 this_mode = 4 + (d < 0);
584 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
586 else
587 this_mode = 2 + (d < 0);
589 mulc[i] = ml;
590 post_shifts[i] = post_shift;
591 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
592 || post_shift >= prec
593 || pre_shifts[i] >= prec)
594 this_mode = -2;
596 if (i == 0)
597 mode = this_mode;
598 else if (mode != this_mode)
599 mode = -2;
602 vec = XALLOCAVEC (tree, nunits);
604 if (use_pow2)
606 tree addend = NULL_TREE;
607 if (sign_p == SIGNED)
609 tree uns_type;
611 /* Both division and remainder sequences need
612 op0 < 0 ? mask : 0 computed. It can be either computed as
613 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
614 if none of the shifts is 0, or as the conditional. */
615 for (i = 0; i < nunits; i++)
616 if (shifts[i] == 0)
617 break;
618 uns_type
619 = build_vector_type (build_nonstandard_integer_type (prec, 1),
620 nunits);
621 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
623 for (i = 0; i < nunits; i++)
624 shift_temps[i] = prec - 1;
625 cur_op = add_rshift (gsi, type, op0, shift_temps);
626 if (cur_op != NULL_TREE)
628 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
629 uns_type, cur_op);
630 for (i = 0; i < nunits; i++)
631 shift_temps[i] = prec - shifts[i];
632 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
633 if (cur_op != NULL_TREE)
634 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
635 type, cur_op);
638 if (addend == NULL_TREE
639 && expand_vec_cond_expr_p (type, type))
641 tree zero, cst, cond;
642 gimple stmt;
644 zero = build_zero_cst (type);
645 cond = build2 (LT_EXPR, type, op0, zero);
646 for (i = 0; i < nunits; i++)
647 vec[i] = build_int_cst (TREE_TYPE (type),
648 ((unsigned HOST_WIDE_INT) 1
649 << shifts[i]) - 1);
650 cst = build_vector (type, vec);
651 addend = make_ssa_name (type);
652 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
653 cst, zero);
654 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
657 if (code == TRUNC_DIV_EXPR)
659 if (sign_p == UNSIGNED)
661 /* q = op0 >> shift; */
662 cur_op = add_rshift (gsi, type, op0, shifts);
663 if (cur_op != NULL_TREE)
664 return cur_op;
666 else if (addend != NULL_TREE)
668 /* t1 = op0 + addend;
669 q = t1 >> shift; */
670 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
671 if (op != unknown_optab
672 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
674 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
675 cur_op = add_rshift (gsi, type, cur_op, shifts);
676 if (cur_op != NULL_TREE)
677 return cur_op;
681 else
683 tree mask;
684 for (i = 0; i < nunits; i++)
685 vec[i] = build_int_cst (TREE_TYPE (type),
686 ((unsigned HOST_WIDE_INT) 1
687 << shifts[i]) - 1);
688 mask = build_vector (type, vec);
689 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
690 if (op != unknown_optab
691 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
693 if (sign_p == UNSIGNED)
694 /* r = op0 & mask; */
695 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
696 else if (addend != NULL_TREE)
698 /* t1 = op0 + addend;
699 t2 = t1 & mask;
700 r = t2 - addend; */
701 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
702 if (op != unknown_optab
703 && optab_handler (op, TYPE_MODE (type))
704 != CODE_FOR_nothing)
706 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
707 addend);
708 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
709 cur_op, mask);
710 op = optab_for_tree_code (MINUS_EXPR, type,
711 optab_default);
712 if (op != unknown_optab
713 && optab_handler (op, TYPE_MODE (type))
714 != CODE_FOR_nothing)
715 return gimplify_build2 (gsi, MINUS_EXPR, type,
716 cur_op, addend);
723 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
724 return NULL_TREE;
726 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
727 return NULL_TREE;
729 cur_op = op0;
731 switch (mode)
733 case 0:
734 gcc_assert (sign_p == UNSIGNED);
735 /* t1 = oprnd0 >> pre_shift;
736 t2 = t1 h* ml;
737 q = t2 >> post_shift; */
738 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
739 if (cur_op == NULL_TREE)
740 return NULL_TREE;
741 break;
742 case 1:
743 gcc_assert (sign_p == UNSIGNED);
744 for (i = 0; i < nunits; i++)
746 shift_temps[i] = 1;
747 post_shifts[i]--;
749 break;
750 case 2:
751 case 3:
752 case 4:
753 case 5:
754 gcc_assert (sign_p == SIGNED);
755 for (i = 0; i < nunits; i++)
756 shift_temps[i] = prec - 1;
757 break;
758 default:
759 return NULL_TREE;
762 for (i = 0; i < nunits; i++)
763 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
764 mulcst = build_vector (type, vec);
766 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
768 switch (mode)
770 case 0:
771 /* t1 = oprnd0 >> pre_shift;
772 t2 = t1 h* ml;
773 q = t2 >> post_shift; */
774 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
775 break;
776 case 1:
777 /* t1 = oprnd0 h* ml;
778 t2 = oprnd0 - t1;
779 t3 = t2 >> 1;
780 t4 = t1 + t3;
781 q = t4 >> (post_shift - 1); */
782 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
783 if (op == unknown_optab
784 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
785 return NULL_TREE;
786 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
787 tem = add_rshift (gsi, type, tem, shift_temps);
788 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
789 if (op == unknown_optab
790 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
791 return NULL_TREE;
792 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
793 cur_op = add_rshift (gsi, type, tem, post_shifts);
794 if (cur_op == NULL_TREE)
795 return NULL_TREE;
796 break;
797 case 2:
798 case 3:
799 case 4:
800 case 5:
801 /* t1 = oprnd0 h* ml;
802 t2 = t1; [ iff (mode & 2) != 0 ]
803 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
804 t3 = t2 >> post_shift;
805 t4 = oprnd0 >> (prec - 1);
806 q = t3 - t4; [ iff (mode & 1) == 0 ]
807 q = t4 - t3; [ iff (mode & 1) != 0 ] */
808 if ((mode & 2) == 0)
810 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
811 if (op == unknown_optab
812 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
813 return NULL_TREE;
814 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
816 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
817 if (cur_op == NULL_TREE)
818 return NULL_TREE;
819 tem = add_rshift (gsi, type, op0, shift_temps);
820 if (tem == NULL_TREE)
821 return NULL_TREE;
822 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
823 if (op == unknown_optab
824 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
825 return NULL_TREE;
826 if ((mode & 1) == 0)
827 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
828 else
829 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
830 break;
831 default:
832 gcc_unreachable ();
835 if (code == TRUNC_DIV_EXPR)
836 return cur_op;
838 /* We divided. Now finish by:
839 t1 = q * oprnd1;
840 r = oprnd0 - t1; */
841 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
842 if (op == unknown_optab
843 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
844 return NULL_TREE;
845 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
846 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
847 if (op == unknown_optab
848 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
849 return NULL_TREE;
850 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
853 /* Expand a vector condition to scalars, by using many conditions
854 on the vector's elements. */
855 static void
856 expand_vector_condition (gimple_stmt_iterator *gsi)
858 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
859 tree type = gimple_expr_type (stmt);
860 tree a = gimple_assign_rhs1 (stmt);
861 tree a1 = a;
862 tree a2;
863 bool a_is_comparison = false;
864 tree b = gimple_assign_rhs2 (stmt);
865 tree c = gimple_assign_rhs3 (stmt);
866 vec<constructor_elt, va_gc> *v;
867 tree constr;
868 tree inner_type = TREE_TYPE (type);
869 tree cond_type = TREE_TYPE (TREE_TYPE (a));
870 tree comp_inner_type = cond_type;
871 tree width = TYPE_SIZE (inner_type);
872 tree index = bitsize_int (0);
873 int nunits = TYPE_VECTOR_SUBPARTS (type);
874 int i;
875 location_t loc = gimple_location (gsi_stmt (*gsi));
877 if (!is_gimple_val (a))
879 gcc_assert (COMPARISON_CLASS_P (a));
880 a_is_comparison = true;
881 a1 = TREE_OPERAND (a, 0);
882 a2 = TREE_OPERAND (a, 1);
883 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
886 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
887 return;
889 /* TODO: try and find a smaller vector type. */
891 warning_at (loc, OPT_Wvector_operation_performance,
892 "vector condition will be expanded piecewise");
894 vec_alloc (v, nunits);
895 for (i = 0; i < nunits;
896 i++, index = int_const_binop (PLUS_EXPR, index, width))
898 tree aa, result;
899 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
900 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
901 if (a_is_comparison)
903 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
904 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
905 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
907 else
908 aa = tree_vec_extract (gsi, cond_type, a, width, index);
909 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
910 constructor_elt ce = {NULL_TREE, result};
911 v->quick_push (ce);
914 constr = build_constructor (type, v);
915 gimple_assign_set_rhs_from_tree (gsi, constr);
916 update_stmt (gsi_stmt (*gsi));
919 static tree
920 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
921 gassign *assign, enum tree_code code)
923 machine_mode compute_mode = TYPE_MODE (compute_type);
925 /* If the compute mode is not a vector mode (hence we are not decomposing
926 a BLKmode vector to smaller, hardware-supported vectors), we may want
927 to expand the operations in parallel. */
928 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
929 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
930 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
931 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
932 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
933 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
934 switch (code)
936 case PLUS_EXPR:
937 case MINUS_EXPR:
938 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
939 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
940 gimple_assign_rhs1 (assign),
941 gimple_assign_rhs2 (assign), code);
942 break;
944 case NEGATE_EXPR:
945 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
946 return expand_vector_addition (gsi, do_unop, do_negate, type,
947 gimple_assign_rhs1 (assign),
948 NULL_TREE, code);
949 break;
951 case BIT_AND_EXPR:
952 case BIT_IOR_EXPR:
953 case BIT_XOR_EXPR:
954 return expand_vector_parallel (gsi, do_binop, type,
955 gimple_assign_rhs1 (assign),
956 gimple_assign_rhs2 (assign), code);
958 case BIT_NOT_EXPR:
959 return expand_vector_parallel (gsi, do_unop, type,
960 gimple_assign_rhs1 (assign),
961 NULL_TREE, code);
962 case EQ_EXPR:
963 case NE_EXPR:
964 case GT_EXPR:
965 case LT_EXPR:
966 case GE_EXPR:
967 case LE_EXPR:
968 case UNEQ_EXPR:
969 case UNGT_EXPR:
970 case UNLT_EXPR:
971 case UNGE_EXPR:
972 case UNLE_EXPR:
973 case LTGT_EXPR:
974 case ORDERED_EXPR:
975 case UNORDERED_EXPR:
977 tree rhs1 = gimple_assign_rhs1 (assign);
978 tree rhs2 = gimple_assign_rhs2 (assign);
980 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
983 case TRUNC_DIV_EXPR:
984 case TRUNC_MOD_EXPR:
986 tree rhs1 = gimple_assign_rhs1 (assign);
987 tree rhs2 = gimple_assign_rhs2 (assign);
988 tree ret;
990 if (!optimize
991 || !VECTOR_INTEGER_TYPE_P (type)
992 || TREE_CODE (rhs2) != VECTOR_CST
993 || !VECTOR_MODE_P (TYPE_MODE (type)))
994 break;
996 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
997 if (ret != NULL_TREE)
998 return ret;
999 break;
1002 default:
1003 break;
1006 if (TREE_CODE_CLASS (code) == tcc_unary)
1007 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1008 gimple_assign_rhs1 (assign),
1009 NULL_TREE, code);
1010 else
1011 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1012 gimple_assign_rhs1 (assign),
1013 gimple_assign_rhs2 (assign), code);
1016 /* Try to optimize
1017 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1018 style stmts into:
1019 _9 = { b_7, b_7, b_7, b_7 };
1020 a_5 = _9 + { 0, 3, 6, 9 };
1021 because vector splat operation is usually more efficient
1022 than piecewise initialization of the vector. */
1024 static void
1025 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1027 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1028 tree lhs = gimple_assign_lhs (stmt);
1029 tree rhs = gimple_assign_rhs1 (stmt);
1030 tree type = TREE_TYPE (rhs);
1031 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1032 bool all_same = true;
1033 constructor_elt *elt;
1034 tree *cst;
1035 gimple g;
1036 tree base = NULL_TREE;
1037 optab op;
1039 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1040 return;
1041 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1042 if (op == unknown_optab
1043 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1044 return;
1045 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1046 if (TREE_CODE (elt->value) != SSA_NAME
1047 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1048 return;
1049 else
1051 tree this_base = elt->value;
1052 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1053 all_same = false;
1054 for (j = 0; j < nelts + 1; j++)
1056 g = SSA_NAME_DEF_STMT (this_base);
1057 if (is_gimple_assign (g)
1058 && gimple_assign_rhs_code (g) == PLUS_EXPR
1059 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1060 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1061 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1062 this_base = gimple_assign_rhs1 (g);
1063 else
1064 break;
1066 if (i == 0)
1067 base = this_base;
1068 else if (this_base != base)
1069 return;
1071 if (all_same)
1072 return;
1073 cst = XALLOCAVEC (tree, nelts);
1074 for (i = 0; i < nelts; i++)
1076 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1077 cst[i] = build_zero_cst (TREE_TYPE (base));
1078 while (this_base != base)
1080 g = SSA_NAME_DEF_STMT (this_base);
1081 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1082 cst[i], gimple_assign_rhs2 (g));
1083 if (cst[i] == NULL_TREE
1084 || TREE_CODE (cst[i]) != INTEGER_CST
1085 || TREE_OVERFLOW (cst[i]))
1086 return;
1087 this_base = gimple_assign_rhs1 (g);
1090 for (i = 0; i < nelts; i++)
1091 CONSTRUCTOR_ELT (rhs, i)->value = base;
1092 g = gimple_build_assign (make_ssa_name (type), rhs);
1093 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1094 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1095 build_vector (type, cst));
1096 gsi_replace (gsi, g, false);
1099 /* Return a type for the widest vector mode whose components are of type
1100 TYPE, or NULL_TREE if none is found. */
1102 static tree
1103 type_for_widest_vector_mode (tree type, optab op)
1105 machine_mode inner_mode = TYPE_MODE (type);
1106 machine_mode best_mode = VOIDmode, mode;
1107 int best_nunits = 0;
1109 if (SCALAR_FLOAT_MODE_P (inner_mode))
1110 mode = MIN_MODE_VECTOR_FLOAT;
1111 else if (SCALAR_FRACT_MODE_P (inner_mode))
1112 mode = MIN_MODE_VECTOR_FRACT;
1113 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1114 mode = MIN_MODE_VECTOR_UFRACT;
1115 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1116 mode = MIN_MODE_VECTOR_ACCUM;
1117 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1118 mode = MIN_MODE_VECTOR_UACCUM;
1119 else
1120 mode = MIN_MODE_VECTOR_INT;
1122 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1123 if (GET_MODE_INNER (mode) == inner_mode
1124 && GET_MODE_NUNITS (mode) > best_nunits
1125 && optab_handler (op, mode) != CODE_FOR_nothing)
1126 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1128 if (best_mode == VOIDmode)
1129 return NULL_TREE;
1130 else
1131 return build_vector_type_for_mode (type, best_mode);
1135 /* Build a reference to the element of the vector VECT. Function
1136 returns either the element itself, either BIT_FIELD_REF, or an
1137 ARRAY_REF expression.
1139 GSI is required to insert temporary variables while building a
1140 refernece to the element of the vector VECT.
1142 PTMPVEC is a pointer to the temporary variable for caching
1143 purposes. In case when PTMPVEC is NULL new temporary variable
1144 will be created. */
1145 static tree
1146 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1148 tree vect_type, vect_elt_type;
1149 gimple asgn;
1150 tree tmpvec;
1151 tree arraytype;
1152 bool need_asgn = true;
1153 unsigned int elements;
1155 vect_type = TREE_TYPE (vect);
1156 vect_elt_type = TREE_TYPE (vect_type);
1157 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1159 if (TREE_CODE (idx) == INTEGER_CST)
1161 unsigned HOST_WIDE_INT index;
1163 /* Given that we're about to compute a binary modulus,
1164 we don't care about the high bits of the value. */
1165 index = TREE_INT_CST_LOW (idx);
1166 if (!tree_fits_uhwi_p (idx) || index >= elements)
1168 index &= elements - 1;
1169 idx = build_int_cst (TREE_TYPE (idx), index);
1172 /* When lowering a vector statement sequence do some easy
1173 simplification by looking through intermediate vector results. */
1174 if (TREE_CODE (vect) == SSA_NAME)
1176 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1177 if (is_gimple_assign (def_stmt)
1178 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1179 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1180 vect = gimple_assign_rhs1 (def_stmt);
1183 if (TREE_CODE (vect) == VECTOR_CST)
1184 return VECTOR_CST_ELT (vect, index);
1185 else if (TREE_CODE (vect) == CONSTRUCTOR
1186 && (CONSTRUCTOR_NELTS (vect) == 0
1187 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1188 != VECTOR_TYPE))
1190 if (index < CONSTRUCTOR_NELTS (vect))
1191 return CONSTRUCTOR_ELT (vect, index)->value;
1192 return build_zero_cst (vect_elt_type);
1194 else
1196 tree size = TYPE_SIZE (vect_elt_type);
1197 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1198 size);
1199 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1203 if (!ptmpvec)
1204 tmpvec = create_tmp_var (vect_type, "vectmp");
1205 else if (!*ptmpvec)
1206 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1207 else
1209 tmpvec = *ptmpvec;
1210 need_asgn = false;
1213 if (need_asgn)
1215 TREE_ADDRESSABLE (tmpvec) = 1;
1216 asgn = gimple_build_assign (tmpvec, vect);
1217 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1220 arraytype = build_array_type_nelts (vect_elt_type, elements);
1221 return build4 (ARRAY_REF, vect_elt_type,
1222 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1223 idx, NULL_TREE, NULL_TREE);
1226 /* Check if VEC_PERM_EXPR within the given setting is supported
1227 by hardware, or lower it piecewise.
1229 When VEC_PERM_EXPR has the same first and second operands:
1230 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1231 {v0[mask[0]], v0[mask[1]], ...}
1232 MASK and V0 must have the same number of elements.
1234 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1235 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1236 V0 and V1 must have the same type. MASK, V0, V1 must have the
1237 same number of arguments. */
1239 static void
1240 lower_vec_perm (gimple_stmt_iterator *gsi)
1242 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1243 tree mask = gimple_assign_rhs3 (stmt);
1244 tree vec0 = gimple_assign_rhs1 (stmt);
1245 tree vec1 = gimple_assign_rhs2 (stmt);
1246 tree vect_type = TREE_TYPE (vec0);
1247 tree mask_type = TREE_TYPE (mask);
1248 tree vect_elt_type = TREE_TYPE (vect_type);
1249 tree mask_elt_type = TREE_TYPE (mask_type);
1250 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1251 vec<constructor_elt, va_gc> *v;
1252 tree constr, t, si, i_val;
1253 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1254 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1255 location_t loc = gimple_location (gsi_stmt (*gsi));
1256 unsigned i;
1258 if (TREE_CODE (mask) == SSA_NAME)
1260 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1261 if (is_gimple_assign (def_stmt)
1262 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1263 mask = gimple_assign_rhs1 (def_stmt);
1266 if (TREE_CODE (mask) == VECTOR_CST)
1268 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1270 for (i = 0; i < elements; ++i)
1271 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1272 & (2 * elements - 1));
1274 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1276 gimple_assign_set_rhs3 (stmt, mask);
1277 update_stmt (stmt);
1278 return;
1281 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1282 return;
1284 warning_at (loc, OPT_Wvector_operation_performance,
1285 "vector shuffling operation will be expanded piecewise");
1287 vec_alloc (v, elements);
1288 for (i = 0; i < elements; i++)
1290 si = size_int (i);
1291 i_val = vector_element (gsi, mask, si, &masktmp);
1293 if (TREE_CODE (i_val) == INTEGER_CST)
1295 unsigned HOST_WIDE_INT index;
1297 index = TREE_INT_CST_LOW (i_val);
1298 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1299 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1301 if (two_operand_p && (index & elements) != 0)
1302 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1303 else
1304 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1306 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1307 true, GSI_SAME_STMT);
1309 else
1311 tree cond = NULL_TREE, v0_val;
1313 if (two_operand_p)
1315 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1316 build_int_cst (mask_elt_type, elements));
1317 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1318 true, GSI_SAME_STMT);
1321 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1322 build_int_cst (mask_elt_type, elements - 1));
1323 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1324 true, GSI_SAME_STMT);
1326 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1327 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1328 true, GSI_SAME_STMT);
1330 if (two_operand_p)
1332 tree v1_val;
1334 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1335 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1336 true, GSI_SAME_STMT);
1338 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1339 cond, build_zero_cst (mask_elt_type));
1340 cond = fold_build3 (COND_EXPR, vect_elt_type,
1341 cond, v0_val, v1_val);
1342 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1343 true, GSI_SAME_STMT);
1345 else
1346 t = v0_val;
1349 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1352 constr = build_constructor (vect_type, v);
1353 gimple_assign_set_rhs_from_tree (gsi, constr);
1354 update_stmt (gsi_stmt (*gsi));
1357 /* Return type in which CODE operation with optab OP can be
1358 computed. */
1360 static tree
1361 get_compute_type (enum tree_code code, optab op, tree type)
1363 /* For very wide vectors, try using a smaller vector mode. */
1364 tree compute_type = type;
1365 if (op
1366 && (!VECTOR_MODE_P (TYPE_MODE (type))
1367 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1369 tree vector_compute_type
1370 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1371 if (vector_compute_type != NULL_TREE
1372 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1373 < TYPE_VECTOR_SUBPARTS (compute_type))
1374 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1375 != CODE_FOR_nothing))
1376 compute_type = vector_compute_type;
1379 /* If we are breaking a BLKmode vector into smaller pieces,
1380 type_for_widest_vector_mode has already looked into the optab,
1381 so skip these checks. */
1382 if (compute_type == type)
1384 machine_mode compute_mode = TYPE_MODE (compute_type);
1385 if (VECTOR_MODE_P (compute_mode))
1387 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1388 return compute_type;
1389 if (code == MULT_HIGHPART_EXPR
1390 && can_mult_highpart_p (compute_mode,
1391 TYPE_UNSIGNED (compute_type)))
1392 return compute_type;
1394 /* There is no operation in hardware, so fall back to scalars. */
1395 compute_type = TREE_TYPE (type);
1398 return compute_type;
1401 /* Helper function of expand_vector_operations_1. Return number of
1402 vector elements for vector types or 1 for other types. */
1404 static inline int
1405 count_type_subparts (tree type)
1407 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1410 static tree
1411 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1412 tree bitpos, tree bitsize, enum tree_code code)
1414 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1415 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1416 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1417 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1418 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1419 return gimplify_build3 (gsi, code, inner_type, cond, a, b);
1422 /* Expand a vector COND_EXPR to scalars, piecewise. */
1423 static void
1424 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1426 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1427 tree type = gimple_expr_type (stmt);
1428 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1429 machine_mode compute_mode = TYPE_MODE (compute_type);
1430 gcc_assert (compute_mode != BLKmode);
1431 tree lhs = gimple_assign_lhs (stmt);
1432 tree rhs2 = gimple_assign_rhs2 (stmt);
1433 tree rhs3 = gimple_assign_rhs3 (stmt);
1434 tree new_rhs;
1436 /* If the compute mode is not a vector mode (hence we are not decomposing
1437 a BLKmode vector to smaller, hardware-supported vectors), we may want
1438 to expand the operations in parallel. */
1439 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1440 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1441 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1442 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1443 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1444 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1445 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1446 COND_EXPR);
1447 else
1448 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1449 rhs2, rhs3, COND_EXPR);
1450 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1451 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1452 new_rhs);
1454 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1455 way to do it is change expand_vector_operation and its callees to
1456 return a tree_code, RHS1 and RHS2 instead of a tree. */
1457 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1458 update_stmt (gsi_stmt (*gsi));
1461 /* Process one statement. If we identify a vector operation, expand it. */
1463 static void
1464 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1466 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1467 enum tree_code code;
1468 optab op = unknown_optab;
1469 enum gimple_rhs_class rhs_class;
1470 tree new_rhs;
1472 /* Only consider code == GIMPLE_ASSIGN. */
1473 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1474 if (!stmt)
1475 return;
1477 code = gimple_assign_rhs_code (stmt);
1478 rhs_class = get_gimple_rhs_class (code);
1479 lhs = gimple_assign_lhs (stmt);
1481 if (code == VEC_PERM_EXPR)
1483 lower_vec_perm (gsi);
1484 return;
1487 if (code == VEC_COND_EXPR)
1489 expand_vector_condition (gsi);
1490 return;
1493 if (code == COND_EXPR
1494 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1495 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1497 expand_vector_scalar_condition (gsi);
1498 return;
1501 if (code == CONSTRUCTOR
1502 && TREE_CODE (lhs) == SSA_NAME
1503 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1504 && !gimple_clobber_p (stmt)
1505 && optimize)
1507 optimize_vector_constructor (gsi);
1508 return;
1511 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1512 return;
1514 rhs1 = gimple_assign_rhs1 (stmt);
1515 type = gimple_expr_type (stmt);
1516 if (rhs_class == GIMPLE_BINARY_RHS)
1517 rhs2 = gimple_assign_rhs2 (stmt);
1519 if (TREE_CODE (type) != VECTOR_TYPE)
1520 return;
1522 if (CONVERT_EXPR_CODE_P (code)
1523 || code == FLOAT_EXPR
1524 || code == FIX_TRUNC_EXPR
1525 || code == VIEW_CONVERT_EXPR)
1526 return;
1528 /* The signedness is determined from input argument. */
1529 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1530 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1531 type = TREE_TYPE (rhs1);
1533 /* For widening/narrowing vector operations, the relevant type is of the
1534 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1535 calculated in the same way above. */
1536 if (code == WIDEN_SUM_EXPR
1537 || code == VEC_WIDEN_MULT_HI_EXPR
1538 || code == VEC_WIDEN_MULT_LO_EXPR
1539 || code == VEC_WIDEN_MULT_EVEN_EXPR
1540 || code == VEC_WIDEN_MULT_ODD_EXPR
1541 || code == VEC_UNPACK_HI_EXPR
1542 || code == VEC_UNPACK_LO_EXPR
1543 || code == VEC_PACK_TRUNC_EXPR
1544 || code == VEC_PACK_SAT_EXPR
1545 || code == VEC_PACK_FIX_TRUNC_EXPR
1546 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1547 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1548 type = TREE_TYPE (rhs1);
1550 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1551 scalar */
1552 if (code == LSHIFT_EXPR
1553 || code == RSHIFT_EXPR
1554 || code == LROTATE_EXPR
1555 || code == RROTATE_EXPR)
1557 optab opv;
1559 /* Check whether we have vector <op> {x,x,x,x} where x
1560 could be a scalar variable or a constant. Transform
1561 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1562 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1564 tree first;
1565 gimple def_stmt;
1567 if ((TREE_CODE (rhs2) == VECTOR_CST
1568 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1569 || (TREE_CODE (rhs2) == SSA_NAME
1570 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1571 && gimple_assign_single_p (def_stmt)
1572 && (first = uniform_vector_p
1573 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1575 gimple_assign_set_rhs2 (stmt, first);
1576 update_stmt (stmt);
1577 rhs2 = first;
1581 opv = optab_for_tree_code (code, type, optab_vector);
1582 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1583 op = opv;
1584 else
1586 op = optab_for_tree_code (code, type, optab_scalar);
1588 compute_type = get_compute_type (code, op, type);
1589 if (compute_type == type)
1590 return;
1591 /* The rtl expander will expand vector/scalar as vector/vector
1592 if necessary. Pick one with wider vector type. */
1593 tree compute_vtype = get_compute_type (code, opv, type);
1594 if (count_type_subparts (compute_vtype)
1595 > count_type_subparts (compute_type))
1597 compute_type = compute_vtype;
1598 op = opv;
1602 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1604 if (compute_type == NULL_TREE)
1605 compute_type = get_compute_type (code, op, type);
1606 if (compute_type == type)
1607 return;
1608 /* Before splitting vector rotates into scalar rotates,
1609 see if we can't use vector shifts and BIT_IOR_EXPR
1610 instead. For vector by vector rotates we'd also
1611 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1612 for now, fold doesn't seem to create such rotates anyway. */
1613 if (compute_type == TREE_TYPE (type)
1614 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1616 optab oplv = vashl_optab, opl = ashl_optab;
1617 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1618 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1619 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1620 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1621 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1622 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1623 /* The rtl expander will expand vector/scalar as vector/vector
1624 if necessary. Pick one with wider vector type. */
1625 if (count_type_subparts (compute_lvtype)
1626 > count_type_subparts (compute_ltype))
1628 compute_ltype = compute_lvtype;
1629 opl = oplv;
1631 if (count_type_subparts (compute_rvtype)
1632 > count_type_subparts (compute_rtype))
1634 compute_rtype = compute_rvtype;
1635 opr = oprv;
1637 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1638 BIT_IOR_EXPR. */
1639 compute_type = compute_ltype;
1640 if (count_type_subparts (compute_type)
1641 > count_type_subparts (compute_rtype))
1642 compute_type = compute_rtype;
1643 if (count_type_subparts (compute_type)
1644 > count_type_subparts (compute_otype))
1645 compute_type = compute_otype;
1646 /* Verify all 3 operations can be performed in that type. */
1647 if (compute_type != TREE_TYPE (type))
1649 if (optab_handler (opl, TYPE_MODE (compute_type))
1650 == CODE_FOR_nothing
1651 || optab_handler (opr, TYPE_MODE (compute_type))
1652 == CODE_FOR_nothing
1653 || optab_handler (opo, TYPE_MODE (compute_type))
1654 == CODE_FOR_nothing)
1655 compute_type = TREE_TYPE (type);
1660 else
1661 op = optab_for_tree_code (code, type, optab_default);
1663 /* Optabs will try converting a negation into a subtraction, so
1664 look for it as well. TODO: negation of floating-point vectors
1665 might be turned into an exclusive OR toggling the sign bit. */
1666 if (op == unknown_optab
1667 && code == NEGATE_EXPR
1668 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1669 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1671 if (compute_type == NULL_TREE)
1672 compute_type = get_compute_type (code, op, type);
1673 if (compute_type == type)
1674 return;
1676 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1678 /* Leave expression untouched for later expansion. */
1679 if (new_rhs == NULL_TREE)
1680 return;
1682 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1683 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1684 new_rhs);
1686 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1687 way to do it is change expand_vector_operation and its callees to
1688 return a tree_code, RHS1 and RHS2 instead of a tree. */
1689 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1690 update_stmt (gsi_stmt (*gsi));
1693 /* Use this to lower vector operations introduced by the vectorizer,
1694 if it may need the bit-twiddling tricks implemented in this file. */
1696 static unsigned int
1697 expand_vector_operations (void)
1699 gimple_stmt_iterator gsi;
1700 basic_block bb;
1701 bool cfg_changed = false;
1703 FOR_EACH_BB_FN (bb, cfun)
1705 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1707 expand_vector_operations_1 (&gsi);
1708 /* ??? If we do not cleanup EH then we will ICE in
1709 verification. But in reality we have created wrong-code
1710 as we did not properly transition EH info and edges to
1711 the piecewise computations. */
1712 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1713 && gimple_purge_dead_eh_edges (bb))
1714 cfg_changed = true;
1718 return cfg_changed ? TODO_cleanup_cfg : 0;
1721 namespace {
1723 const pass_data pass_data_lower_vector =
1725 GIMPLE_PASS, /* type */
1726 "veclower", /* name */
1727 OPTGROUP_VEC, /* optinfo_flags */
1728 TV_NONE, /* tv_id */
1729 PROP_cfg, /* properties_required */
1730 PROP_gimple_lvec, /* properties_provided */
1731 0, /* properties_destroyed */
1732 0, /* todo_flags_start */
1733 TODO_update_ssa, /* todo_flags_finish */
1736 class pass_lower_vector : public gimple_opt_pass
1738 public:
1739 pass_lower_vector (gcc::context *ctxt)
1740 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1743 /* opt_pass methods: */
1744 virtual bool gate (function *fun)
1746 return !(fun->curr_properties & PROP_gimple_lvec);
1749 virtual unsigned int execute (function *)
1751 return expand_vector_operations ();
1754 }; // class pass_lower_vector
1756 } // anon namespace
1758 gimple_opt_pass *
1759 make_pass_lower_vector (gcc::context *ctxt)
1761 return new pass_lower_vector (ctxt);
1764 namespace {
1766 const pass_data pass_data_lower_vector_ssa =
1768 GIMPLE_PASS, /* type */
1769 "veclower2", /* name */
1770 OPTGROUP_VEC, /* optinfo_flags */
1771 TV_NONE, /* tv_id */
1772 PROP_cfg, /* properties_required */
1773 PROP_gimple_lvec, /* properties_provided */
1774 0, /* properties_destroyed */
1775 0, /* todo_flags_start */
1776 ( TODO_update_ssa
1777 | TODO_cleanup_cfg ), /* todo_flags_finish */
1780 class pass_lower_vector_ssa : public gimple_opt_pass
1782 public:
1783 pass_lower_vector_ssa (gcc::context *ctxt)
1784 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1787 /* opt_pass methods: */
1788 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1789 virtual unsigned int execute (function *)
1791 return expand_vector_operations ();
1794 }; // class pass_lower_vector_ssa
1796 } // anon namespace
1798 gimple_opt_pass *
1799 make_pass_lower_vector_ssa (gcc::context *ctxt)
1801 return new pass_lower_vector_ssa (ctxt);
1804 #include "gt-tree-vect-generic.h"