2014-12-19 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc.git] / gcc / tree-vect-generic.c
blobde09ff0b1e79bcd283105d9265ffe33c8599da99
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tree.h"
24 #include "stor-layout.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "predict.h"
28 #include "vec.h"
29 #include "hashtab.h"
30 #include "hash-set.h"
31 #include "machmode.h"
32 #include "hard-reg-set.h"
33 #include "input.h"
34 #include "function.h"
35 #include "dominance.h"
36 #include "cfg.h"
37 #include "basic-block.h"
38 #include "tree-ssa-alias.h"
39 #include "internal-fn.h"
40 #include "tree-eh.h"
41 #include "gimple-expr.h"
42 #include "is-a.h"
43 #include "gimple.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-ssa.h"
47 #include "tree-cfg.h"
48 #include "stringpool.h"
49 #include "tree-ssanames.h"
50 #include "tree-iterator.h"
51 #include "tree-pass.h"
52 #include "flags.h"
53 #include "diagnostic.h"
54 #include "target.h"
56 /* Need to include rtl.h, expr.h, etc. for optabs. */
57 #include "expr.h"
58 #include "insn-codes.h"
59 #include "optabs.h"
62 static void expand_vector_operations_1 (gimple_stmt_iterator *);
65 /* Build a constant of type TYPE, made of VALUE's bits replicated
66 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
67 static tree
68 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
70 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
71 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
72 / HOST_BITS_PER_WIDE_INT;
73 unsigned HOST_WIDE_INT low, mask;
74 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
75 int i;
77 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
79 if (width == HOST_BITS_PER_WIDE_INT)
80 low = value;
81 else
83 mask = ((HOST_WIDE_INT)1 << width) - 1;
84 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
87 for (i = 0; i < n; i++)
88 a[i] = low;
90 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
91 return wide_int_to_tree
92 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
95 static GTY(()) tree vector_inner_type;
96 static GTY(()) tree vector_last_type;
97 static GTY(()) int vector_last_nunits;
99 /* Return a suitable vector types made of SUBPARTS units each of mode
100 "word_mode" (the global variable). */
101 static tree
102 build_word_mode_vector_type (int nunits)
104 if (!vector_inner_type)
105 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
106 else if (vector_last_nunits == nunits)
108 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
109 return vector_last_type;
112 /* We build a new type, but we canonicalize it nevertheless,
113 because it still saves some memory. */
114 vector_last_nunits = nunits;
115 vector_last_type = type_hash_canon (nunits,
116 build_vector_type (vector_inner_type,
117 nunits));
118 return vector_last_type;
121 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
122 tree, tree, tree, tree, tree, enum tree_code);
124 static inline tree
125 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
126 tree t, tree bitsize, tree bitpos)
128 if (bitpos)
129 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
130 else
131 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
134 static tree
135 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
136 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
137 enum tree_code code)
139 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
140 return gimplify_build1 (gsi, code, inner_type, a);
143 static tree
144 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
145 tree bitpos, tree bitsize, enum tree_code code)
147 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
148 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
149 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
150 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
151 return gimplify_build2 (gsi, code, inner_type, a, b);
154 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
156 INNER_TYPE is the type of A and B elements
158 returned expression is of signed integer type with the
159 size equal to the size of INNER_TYPE. */
160 static tree
161 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
162 tree bitpos, tree bitsize, enum tree_code code)
164 tree comp_type;
166 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
167 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
169 comp_type = build_nonstandard_integer_type
170 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
172 return gimplify_build3 (gsi, COND_EXPR, comp_type,
173 fold_build2 (code, boolean_type_node, a, b),
174 build_int_cst (comp_type, -1),
175 build_int_cst (comp_type, 0));
178 /* Expand vector addition to scalars. This does bit twiddling
179 in order to increase parallelism:
181 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
182 (a ^ b) & 0x80808080
184 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
185 (a ^ ~b) & 0x80808080
187 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
189 This optimization should be done only if 4 vector items or more
190 fit into a word. */
191 static tree
192 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
193 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
194 enum tree_code code)
196 tree inner_type = TREE_TYPE (TREE_TYPE (a));
197 unsigned HOST_WIDE_INT max;
198 tree low_bits, high_bits, a_low, b_low, result_low, signs;
200 max = GET_MODE_MASK (TYPE_MODE (inner_type));
201 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
202 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
204 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
205 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
207 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
208 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
209 if (code == PLUS_EXPR)
210 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
211 else
213 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
214 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
217 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
218 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
219 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
222 static tree
223 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
224 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
225 tree bitsize ATTRIBUTE_UNUSED,
226 enum tree_code code ATTRIBUTE_UNUSED)
228 tree inner_type = TREE_TYPE (TREE_TYPE (b));
229 HOST_WIDE_INT max;
230 tree low_bits, high_bits, b_low, result_low, signs;
232 max = GET_MODE_MASK (TYPE_MODE (inner_type));
233 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
234 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
236 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
238 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
239 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
240 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
241 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
242 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
245 /* Expand a vector operation to scalars, by using many operations
246 whose type is the vector type's inner type. */
247 static tree
248 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
249 tree type, tree inner_type,
250 tree a, tree b, enum tree_code code)
252 vec<constructor_elt, va_gc> *v;
253 tree part_width = TYPE_SIZE (inner_type);
254 tree index = bitsize_int (0);
255 int nunits = TYPE_VECTOR_SUBPARTS (type);
256 int delta = tree_to_uhwi (part_width)
257 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
258 int i;
259 location_t loc = gimple_location (gsi_stmt (*gsi));
261 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
262 warning_at (loc, OPT_Wvector_operation_performance,
263 "vector operation will be expanded piecewise");
264 else
265 warning_at (loc, OPT_Wvector_operation_performance,
266 "vector operation will be expanded in parallel");
268 vec_alloc (v, (nunits + delta - 1) / delta);
269 for (i = 0; i < nunits;
270 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
272 tree result = f (gsi, inner_type, a, b, index, part_width, code);
273 constructor_elt ce = {NULL_TREE, result};
274 v->quick_push (ce);
277 return build_constructor (type, v);
280 /* Expand a vector operation to scalars with the freedom to use
281 a scalar integer type, or to use a different size for the items
282 in the vector type. */
283 static tree
284 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
285 tree a, tree b,
286 enum tree_code code)
288 tree result, compute_type;
289 machine_mode mode;
290 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
291 location_t loc = gimple_location (gsi_stmt (*gsi));
293 /* We have three strategies. If the type is already correct, just do
294 the operation an element at a time. Else, if the vector is wider than
295 one word, do it a word at a time; finally, if the vector is smaller
296 than one word, do it as a scalar. */
297 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
298 return expand_vector_piecewise (gsi, f,
299 type, TREE_TYPE (type),
300 a, b, code);
301 else if (n_words > 1)
303 tree word_type = build_word_mode_vector_type (n_words);
304 result = expand_vector_piecewise (gsi, f,
305 word_type, TREE_TYPE (word_type),
306 a, b, code);
307 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
308 GSI_SAME_STMT);
310 else
312 /* Use a single scalar operation with a mode no wider than word_mode. */
313 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
314 compute_type = lang_hooks.types.type_for_mode (mode, 1);
315 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
316 warning_at (loc, OPT_Wvector_operation_performance,
317 "vector operation will be expanded with a "
318 "single scalar operation");
321 return result;
324 /* Expand a vector operation to scalars; for integer types we can use
325 special bit twiddling tricks to do the sums a word at a time, using
326 function F_PARALLEL instead of F. These tricks are done only if
327 they can process at least four items, that is, only if the vector
328 holds at least four items and if a word can hold four items. */
329 static tree
330 expand_vector_addition (gimple_stmt_iterator *gsi,
331 elem_op_func f, elem_op_func f_parallel,
332 tree type, tree a, tree b, enum tree_code code)
334 int parts_per_word = UNITS_PER_WORD
335 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
337 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
338 && parts_per_word >= 4
339 && TYPE_VECTOR_SUBPARTS (type) >= 4)
340 return expand_vector_parallel (gsi, f_parallel,
341 type, a, b, code);
342 else
343 return expand_vector_piecewise (gsi, f,
344 type, TREE_TYPE (type),
345 a, b, code);
348 /* Try to expand vector comparison expression OP0 CODE OP1 by
349 querying optab if the following expression:
350 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
351 can be expanded. */
352 static tree
353 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
354 tree op1, enum tree_code code)
356 tree t;
357 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
358 t = expand_vector_piecewise (gsi, do_compare, type,
359 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
360 else
361 t = NULL_TREE;
363 return t;
366 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
367 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
368 the result if successful, otherwise return NULL_TREE. */
369 static tree
370 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
372 optab op;
373 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
374 bool scalar_shift = true;
376 for (i = 1; i < nunits; i++)
378 if (shiftcnts[i] != shiftcnts[0])
379 scalar_shift = false;
382 if (scalar_shift && shiftcnts[0] == 0)
383 return op0;
385 if (scalar_shift)
387 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
388 if (op != unknown_optab
389 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
390 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
391 build_int_cst (NULL_TREE, shiftcnts[0]));
394 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
395 if (op != unknown_optab
396 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
398 tree *vec = XALLOCAVEC (tree, nunits);
399 for (i = 0; i < nunits; i++)
400 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
401 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
402 build_vector (type, vec));
405 return NULL_TREE;
408 /* Try to expand integer vector division by constant using
409 widening multiply, shifts and additions. */
410 static tree
411 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
412 tree op1, enum tree_code code)
414 bool use_pow2 = true;
415 bool has_vector_shift = true;
416 int mode = -1, this_mode;
417 int pre_shift = -1, post_shift;
418 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
419 int *shifts = XALLOCAVEC (int, nunits * 4);
420 int *pre_shifts = shifts + nunits;
421 int *post_shifts = pre_shifts + nunits;
422 int *shift_temps = post_shifts + nunits;
423 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
424 int prec = TYPE_PRECISION (TREE_TYPE (type));
425 int dummy_int;
426 unsigned int i;
427 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
428 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
429 tree *vec;
430 tree cur_op, mulcst, tem;
431 optab op;
433 if (prec > HOST_BITS_PER_WIDE_INT)
434 return NULL_TREE;
436 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
437 if (op == unknown_optab
438 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
439 has_vector_shift = false;
441 /* Analysis phase. Determine if all op1 elements are either power
442 of two and it is possible to expand it using shifts (or for remainder
443 using masking). Additionally compute the multiplicative constants
444 and pre and post shifts if the division is to be expanded using
445 widening or high part multiplication plus shifts. */
446 for (i = 0; i < nunits; i++)
448 tree cst = VECTOR_CST_ELT (op1, i);
449 unsigned HOST_WIDE_INT ml;
451 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
452 return NULL_TREE;
453 pre_shifts[i] = 0;
454 post_shifts[i] = 0;
455 mulc[i] = 0;
456 if (use_pow2
457 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
458 use_pow2 = false;
459 if (use_pow2)
461 shifts[i] = tree_log2 (cst);
462 if (shifts[i] != shifts[0]
463 && code == TRUNC_DIV_EXPR
464 && !has_vector_shift)
465 use_pow2 = false;
467 if (mode == -2)
468 continue;
469 if (sign_p == UNSIGNED)
471 unsigned HOST_WIDE_INT mh;
472 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
474 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
475 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
476 return NULL_TREE;
478 if (d <= 1)
480 mode = -2;
481 continue;
484 /* Find a suitable multiplier and right shift count
485 instead of multiplying with D. */
486 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
488 /* If the suggested multiplier is more than SIZE bits, we can
489 do better for even divisors, using an initial right shift. */
490 if ((mh != 0 && (d & 1) == 0)
491 || (!has_vector_shift && pre_shift != -1))
493 if (has_vector_shift)
494 pre_shift = floor_log2 (d & -d);
495 else if (pre_shift == -1)
497 unsigned int j;
498 for (j = 0; j < nunits; j++)
500 tree cst2 = VECTOR_CST_ELT (op1, j);
501 unsigned HOST_WIDE_INT d2;
502 int this_pre_shift;
504 if (!tree_fits_uhwi_p (cst2))
505 return NULL_TREE;
506 d2 = tree_to_uhwi (cst2) & mask;
507 if (d2 == 0)
508 return NULL_TREE;
509 this_pre_shift = floor_log2 (d2 & -d2);
510 if (pre_shift == -1 || this_pre_shift < pre_shift)
511 pre_shift = this_pre_shift;
513 if (i != 0 && pre_shift != 0)
515 /* Restart. */
516 i = -1U;
517 mode = -1;
518 continue;
521 if (pre_shift != 0)
523 if ((d >> pre_shift) <= 1)
525 mode = -2;
526 continue;
528 mh = choose_multiplier (d >> pre_shift, prec,
529 prec - pre_shift,
530 &ml, &post_shift, &dummy_int);
531 gcc_assert (!mh);
532 pre_shifts[i] = pre_shift;
535 if (!mh)
536 this_mode = 0;
537 else
538 this_mode = 1;
540 else
542 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
543 unsigned HOST_WIDE_INT abs_d;
545 if (d == -1)
546 return NULL_TREE;
548 /* Since d might be INT_MIN, we have to cast to
549 unsigned HOST_WIDE_INT before negating to avoid
550 undefined signed overflow. */
551 abs_d = (d >= 0
552 ? (unsigned HOST_WIDE_INT) d
553 : - (unsigned HOST_WIDE_INT) d);
555 /* n rem d = n rem -d */
556 if (code == TRUNC_MOD_EXPR && d < 0)
557 d = abs_d;
558 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
560 /* This case is not handled correctly below. */
561 mode = -2;
562 continue;
564 if (abs_d <= 1)
566 mode = -2;
567 continue;
570 choose_multiplier (abs_d, prec, prec - 1, &ml,
571 &post_shift, &dummy_int);
572 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
574 this_mode = 4 + (d < 0);
575 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
577 else
578 this_mode = 2 + (d < 0);
580 mulc[i] = ml;
581 post_shifts[i] = post_shift;
582 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
583 || post_shift >= prec
584 || pre_shifts[i] >= prec)
585 this_mode = -2;
587 if (i == 0)
588 mode = this_mode;
589 else if (mode != this_mode)
590 mode = -2;
593 vec = XALLOCAVEC (tree, nunits);
595 if (use_pow2)
597 tree addend = NULL_TREE;
598 if (sign_p == SIGNED)
600 tree uns_type;
602 /* Both division and remainder sequences need
603 op0 < 0 ? mask : 0 computed. It can be either computed as
604 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
605 if none of the shifts is 0, or as the conditional. */
606 for (i = 0; i < nunits; i++)
607 if (shifts[i] == 0)
608 break;
609 uns_type
610 = build_vector_type (build_nonstandard_integer_type (prec, 1),
611 nunits);
612 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
614 for (i = 0; i < nunits; i++)
615 shift_temps[i] = prec - 1;
616 cur_op = add_rshift (gsi, type, op0, shift_temps);
617 if (cur_op != NULL_TREE)
619 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
620 uns_type, cur_op);
621 for (i = 0; i < nunits; i++)
622 shift_temps[i] = prec - shifts[i];
623 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
624 if (cur_op != NULL_TREE)
625 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
626 type, cur_op);
629 if (addend == NULL_TREE
630 && expand_vec_cond_expr_p (type, type))
632 tree zero, cst, cond;
633 gimple stmt;
635 zero = build_zero_cst (type);
636 cond = build2 (LT_EXPR, type, op0, zero);
637 for (i = 0; i < nunits; i++)
638 vec[i] = build_int_cst (TREE_TYPE (type),
639 ((unsigned HOST_WIDE_INT) 1
640 << shifts[i]) - 1);
641 cst = build_vector (type, vec);
642 addend = make_ssa_name (type);
643 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
644 cst, zero);
645 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
648 if (code == TRUNC_DIV_EXPR)
650 if (sign_p == UNSIGNED)
652 /* q = op0 >> shift; */
653 cur_op = add_rshift (gsi, type, op0, shifts);
654 if (cur_op != NULL_TREE)
655 return cur_op;
657 else if (addend != NULL_TREE)
659 /* t1 = op0 + addend;
660 q = t1 >> shift; */
661 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
662 if (op != unknown_optab
663 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
665 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
666 cur_op = add_rshift (gsi, type, cur_op, shifts);
667 if (cur_op != NULL_TREE)
668 return cur_op;
672 else
674 tree mask;
675 for (i = 0; i < nunits; i++)
676 vec[i] = build_int_cst (TREE_TYPE (type),
677 ((unsigned HOST_WIDE_INT) 1
678 << shifts[i]) - 1);
679 mask = build_vector (type, vec);
680 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
681 if (op != unknown_optab
682 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
684 if (sign_p == UNSIGNED)
685 /* r = op0 & mask; */
686 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
687 else if (addend != NULL_TREE)
689 /* t1 = op0 + addend;
690 t2 = t1 & mask;
691 r = t2 - addend; */
692 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
693 if (op != unknown_optab
694 && optab_handler (op, TYPE_MODE (type))
695 != CODE_FOR_nothing)
697 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
698 addend);
699 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
700 cur_op, mask);
701 op = optab_for_tree_code (MINUS_EXPR, type,
702 optab_default);
703 if (op != unknown_optab
704 && optab_handler (op, TYPE_MODE (type))
705 != CODE_FOR_nothing)
706 return gimplify_build2 (gsi, MINUS_EXPR, type,
707 cur_op, addend);
714 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
715 return NULL_TREE;
717 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
718 return NULL_TREE;
720 cur_op = op0;
722 switch (mode)
724 case 0:
725 gcc_assert (sign_p == UNSIGNED);
726 /* t1 = oprnd0 >> pre_shift;
727 t2 = t1 h* ml;
728 q = t2 >> post_shift; */
729 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
730 if (cur_op == NULL_TREE)
731 return NULL_TREE;
732 break;
733 case 1:
734 gcc_assert (sign_p == UNSIGNED);
735 for (i = 0; i < nunits; i++)
737 shift_temps[i] = 1;
738 post_shifts[i]--;
740 break;
741 case 2:
742 case 3:
743 case 4:
744 case 5:
745 gcc_assert (sign_p == SIGNED);
746 for (i = 0; i < nunits; i++)
747 shift_temps[i] = prec - 1;
748 break;
749 default:
750 return NULL_TREE;
753 for (i = 0; i < nunits; i++)
754 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
755 mulcst = build_vector (type, vec);
757 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
759 switch (mode)
761 case 0:
762 /* t1 = oprnd0 >> pre_shift;
763 t2 = t1 h* ml;
764 q = t2 >> post_shift; */
765 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
766 break;
767 case 1:
768 /* t1 = oprnd0 h* ml;
769 t2 = oprnd0 - t1;
770 t3 = t2 >> 1;
771 t4 = t1 + t3;
772 q = t4 >> (post_shift - 1); */
773 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
774 if (op == unknown_optab
775 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
776 return NULL_TREE;
777 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
778 tem = add_rshift (gsi, type, tem, shift_temps);
779 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
780 if (op == unknown_optab
781 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
782 return NULL_TREE;
783 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
784 cur_op = add_rshift (gsi, type, tem, post_shifts);
785 if (cur_op == NULL_TREE)
786 return NULL_TREE;
787 break;
788 case 2:
789 case 3:
790 case 4:
791 case 5:
792 /* t1 = oprnd0 h* ml;
793 t2 = t1; [ iff (mode & 2) != 0 ]
794 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
795 t3 = t2 >> post_shift;
796 t4 = oprnd0 >> (prec - 1);
797 q = t3 - t4; [ iff (mode & 1) == 0 ]
798 q = t4 - t3; [ iff (mode & 1) != 0 ] */
799 if ((mode & 2) == 0)
801 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
802 if (op == unknown_optab
803 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
804 return NULL_TREE;
805 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
807 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
808 if (cur_op == NULL_TREE)
809 return NULL_TREE;
810 tem = add_rshift (gsi, type, op0, shift_temps);
811 if (tem == NULL_TREE)
812 return NULL_TREE;
813 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
814 if (op == unknown_optab
815 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
816 return NULL_TREE;
817 if ((mode & 1) == 0)
818 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
819 else
820 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
821 break;
822 default:
823 gcc_unreachable ();
826 if (code == TRUNC_DIV_EXPR)
827 return cur_op;
829 /* We divided. Now finish by:
830 t1 = q * oprnd1;
831 r = oprnd0 - t1; */
832 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
833 if (op == unknown_optab
834 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
835 return NULL_TREE;
836 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
837 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
838 if (op == unknown_optab
839 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
840 return NULL_TREE;
841 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
844 /* Expand a vector condition to scalars, by using many conditions
845 on the vector's elements. */
846 static void
847 expand_vector_condition (gimple_stmt_iterator *gsi)
849 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
850 tree type = gimple_expr_type (stmt);
851 tree a = gimple_assign_rhs1 (stmt);
852 tree a1 = a;
853 tree a2;
854 bool a_is_comparison = false;
855 tree b = gimple_assign_rhs2 (stmt);
856 tree c = gimple_assign_rhs3 (stmt);
857 vec<constructor_elt, va_gc> *v;
858 tree constr;
859 tree inner_type = TREE_TYPE (type);
860 tree cond_type = TREE_TYPE (TREE_TYPE (a));
861 tree comp_inner_type = cond_type;
862 tree width = TYPE_SIZE (inner_type);
863 tree index = bitsize_int (0);
864 int nunits = TYPE_VECTOR_SUBPARTS (type);
865 int i;
866 location_t loc = gimple_location (gsi_stmt (*gsi));
868 if (!is_gimple_val (a))
870 gcc_assert (COMPARISON_CLASS_P (a));
871 a_is_comparison = true;
872 a1 = TREE_OPERAND (a, 0);
873 a2 = TREE_OPERAND (a, 1);
874 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
877 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
878 return;
880 /* TODO: try and find a smaller vector type. */
882 warning_at (loc, OPT_Wvector_operation_performance,
883 "vector condition will be expanded piecewise");
885 vec_alloc (v, nunits);
886 for (i = 0; i < nunits;
887 i++, index = int_const_binop (PLUS_EXPR, index, width))
889 tree aa, result;
890 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
891 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
892 if (a_is_comparison)
894 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
895 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
896 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
898 else
899 aa = tree_vec_extract (gsi, cond_type, a, width, index);
900 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
901 constructor_elt ce = {NULL_TREE, result};
902 v->quick_push (ce);
905 constr = build_constructor (type, v);
906 gimple_assign_set_rhs_from_tree (gsi, constr);
907 update_stmt (gsi_stmt (*gsi));
910 static tree
911 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
912 gassign *assign, enum tree_code code)
914 machine_mode compute_mode = TYPE_MODE (compute_type);
916 /* If the compute mode is not a vector mode (hence we are not decomposing
917 a BLKmode vector to smaller, hardware-supported vectors), we may want
918 to expand the operations in parallel. */
919 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
920 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
921 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
922 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
923 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
924 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
925 switch (code)
927 case PLUS_EXPR:
928 case MINUS_EXPR:
929 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
930 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
931 gimple_assign_rhs1 (assign),
932 gimple_assign_rhs2 (assign), code);
933 break;
935 case NEGATE_EXPR:
936 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
937 return expand_vector_addition (gsi, do_unop, do_negate, type,
938 gimple_assign_rhs1 (assign),
939 NULL_TREE, code);
940 break;
942 case BIT_AND_EXPR:
943 case BIT_IOR_EXPR:
944 case BIT_XOR_EXPR:
945 return expand_vector_parallel (gsi, do_binop, type,
946 gimple_assign_rhs1 (assign),
947 gimple_assign_rhs2 (assign), code);
949 case BIT_NOT_EXPR:
950 return expand_vector_parallel (gsi, do_unop, type,
951 gimple_assign_rhs1 (assign),
952 NULL_TREE, code);
953 case EQ_EXPR:
954 case NE_EXPR:
955 case GT_EXPR:
956 case LT_EXPR:
957 case GE_EXPR:
958 case LE_EXPR:
959 case UNEQ_EXPR:
960 case UNGT_EXPR:
961 case UNLT_EXPR:
962 case UNGE_EXPR:
963 case UNLE_EXPR:
964 case LTGT_EXPR:
965 case ORDERED_EXPR:
966 case UNORDERED_EXPR:
968 tree rhs1 = gimple_assign_rhs1 (assign);
969 tree rhs2 = gimple_assign_rhs2 (assign);
971 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
974 case TRUNC_DIV_EXPR:
975 case TRUNC_MOD_EXPR:
977 tree rhs1 = gimple_assign_rhs1 (assign);
978 tree rhs2 = gimple_assign_rhs2 (assign);
979 tree ret;
981 if (!optimize
982 || !VECTOR_INTEGER_TYPE_P (type)
983 || TREE_CODE (rhs2) != VECTOR_CST
984 || !VECTOR_MODE_P (TYPE_MODE (type)))
985 break;
987 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
988 if (ret != NULL_TREE)
989 return ret;
990 break;
993 default:
994 break;
997 if (TREE_CODE_CLASS (code) == tcc_unary)
998 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
999 gimple_assign_rhs1 (assign),
1000 NULL_TREE, code);
1001 else
1002 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1003 gimple_assign_rhs1 (assign),
1004 gimple_assign_rhs2 (assign), code);
1007 /* Try to optimize
1008 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1009 style stmts into:
1010 _9 = { b_7, b_7, b_7, b_7 };
1011 a_5 = _9 + { 0, 3, 6, 9 };
1012 because vector splat operation is usually more efficient
1013 than piecewise initialization of the vector. */
1015 static void
1016 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1018 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1019 tree lhs = gimple_assign_lhs (stmt);
1020 tree rhs = gimple_assign_rhs1 (stmt);
1021 tree type = TREE_TYPE (rhs);
1022 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1023 bool all_same = true;
1024 constructor_elt *elt;
1025 tree *cst;
1026 gimple g;
1027 tree base = NULL_TREE;
1028 optab op;
1030 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1031 return;
1032 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1033 if (op == unknown_optab
1034 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1035 return;
1036 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1037 if (TREE_CODE (elt->value) != SSA_NAME
1038 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1039 return;
1040 else
1042 tree this_base = elt->value;
1043 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1044 all_same = false;
1045 for (j = 0; j < nelts + 1; j++)
1047 g = SSA_NAME_DEF_STMT (this_base);
1048 if (is_gimple_assign (g)
1049 && gimple_assign_rhs_code (g) == PLUS_EXPR
1050 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1051 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1052 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1053 this_base = gimple_assign_rhs1 (g);
1054 else
1055 break;
1057 if (i == 0)
1058 base = this_base;
1059 else if (this_base != base)
1060 return;
1062 if (all_same)
1063 return;
1064 cst = XALLOCAVEC (tree, nelts);
1065 for (i = 0; i < nelts; i++)
1067 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1068 cst[i] = build_zero_cst (TREE_TYPE (base));
1069 while (this_base != base)
1071 g = SSA_NAME_DEF_STMT (this_base);
1072 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1073 cst[i], gimple_assign_rhs2 (g));
1074 if (cst[i] == NULL_TREE
1075 || TREE_CODE (cst[i]) != INTEGER_CST
1076 || TREE_OVERFLOW (cst[i]))
1077 return;
1078 this_base = gimple_assign_rhs1 (g);
1081 for (i = 0; i < nelts; i++)
1082 CONSTRUCTOR_ELT (rhs, i)->value = base;
1083 g = gimple_build_assign (make_ssa_name (type), rhs);
1084 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1085 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1086 build_vector (type, cst));
1087 gsi_replace (gsi, g, false);
1090 /* Return a type for the widest vector mode whose components are of type
1091 TYPE, or NULL_TREE if none is found. */
1093 static tree
1094 type_for_widest_vector_mode (tree type, optab op)
1096 machine_mode inner_mode = TYPE_MODE (type);
1097 machine_mode best_mode = VOIDmode, mode;
1098 int best_nunits = 0;
1100 if (SCALAR_FLOAT_MODE_P (inner_mode))
1101 mode = MIN_MODE_VECTOR_FLOAT;
1102 else if (SCALAR_FRACT_MODE_P (inner_mode))
1103 mode = MIN_MODE_VECTOR_FRACT;
1104 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1105 mode = MIN_MODE_VECTOR_UFRACT;
1106 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1107 mode = MIN_MODE_VECTOR_ACCUM;
1108 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1109 mode = MIN_MODE_VECTOR_UACCUM;
1110 else
1111 mode = MIN_MODE_VECTOR_INT;
1113 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1114 if (GET_MODE_INNER (mode) == inner_mode
1115 && GET_MODE_NUNITS (mode) > best_nunits
1116 && optab_handler (op, mode) != CODE_FOR_nothing)
1117 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1119 if (best_mode == VOIDmode)
1120 return NULL_TREE;
1121 else
1122 return build_vector_type_for_mode (type, best_mode);
1126 /* Build a reference to the element of the vector VECT. Function
1127 returns either the element itself, either BIT_FIELD_REF, or an
1128 ARRAY_REF expression.
1130 GSI is required to insert temporary variables while building a
1131 refernece to the element of the vector VECT.
1133 PTMPVEC is a pointer to the temporary variable for caching
1134 purposes. In case when PTMPVEC is NULL new temporary variable
1135 will be created. */
1136 static tree
1137 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1139 tree vect_type, vect_elt_type;
1140 gimple asgn;
1141 tree tmpvec;
1142 tree arraytype;
1143 bool need_asgn = true;
1144 unsigned int elements;
1146 vect_type = TREE_TYPE (vect);
1147 vect_elt_type = TREE_TYPE (vect_type);
1148 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1150 if (TREE_CODE (idx) == INTEGER_CST)
1152 unsigned HOST_WIDE_INT index;
1154 /* Given that we're about to compute a binary modulus,
1155 we don't care about the high bits of the value. */
1156 index = TREE_INT_CST_LOW (idx);
1157 if (!tree_fits_uhwi_p (idx) || index >= elements)
1159 index &= elements - 1;
1160 idx = build_int_cst (TREE_TYPE (idx), index);
1163 /* When lowering a vector statement sequence do some easy
1164 simplification by looking through intermediate vector results. */
1165 if (TREE_CODE (vect) == SSA_NAME)
1167 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1168 if (is_gimple_assign (def_stmt)
1169 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1170 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1171 vect = gimple_assign_rhs1 (def_stmt);
1174 if (TREE_CODE (vect) == VECTOR_CST)
1175 return VECTOR_CST_ELT (vect, index);
1176 else if (TREE_CODE (vect) == CONSTRUCTOR
1177 && (CONSTRUCTOR_NELTS (vect) == 0
1178 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1179 != VECTOR_TYPE))
1181 if (index < CONSTRUCTOR_NELTS (vect))
1182 return CONSTRUCTOR_ELT (vect, index)->value;
1183 return build_zero_cst (vect_elt_type);
1185 else
1187 tree size = TYPE_SIZE (vect_elt_type);
1188 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1189 size);
1190 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1194 if (!ptmpvec)
1195 tmpvec = create_tmp_var (vect_type, "vectmp");
1196 else if (!*ptmpvec)
1197 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1198 else
1200 tmpvec = *ptmpvec;
1201 need_asgn = false;
1204 if (need_asgn)
1206 TREE_ADDRESSABLE (tmpvec) = 1;
1207 asgn = gimple_build_assign (tmpvec, vect);
1208 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1211 arraytype = build_array_type_nelts (vect_elt_type, elements);
1212 return build4 (ARRAY_REF, vect_elt_type,
1213 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1214 idx, NULL_TREE, NULL_TREE);
1217 /* Check if VEC_PERM_EXPR within the given setting is supported
1218 by hardware, or lower it piecewise.
1220 When VEC_PERM_EXPR has the same first and second operands:
1221 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1222 {v0[mask[0]], v0[mask[1]], ...}
1223 MASK and V0 must have the same number of elements.
1225 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1226 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1227 V0 and V1 must have the same type. MASK, V0, V1 must have the
1228 same number of arguments. */
1230 static void
1231 lower_vec_perm (gimple_stmt_iterator *gsi)
1233 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1234 tree mask = gimple_assign_rhs3 (stmt);
1235 tree vec0 = gimple_assign_rhs1 (stmt);
1236 tree vec1 = gimple_assign_rhs2 (stmt);
1237 tree vect_type = TREE_TYPE (vec0);
1238 tree mask_type = TREE_TYPE (mask);
1239 tree vect_elt_type = TREE_TYPE (vect_type);
1240 tree mask_elt_type = TREE_TYPE (mask_type);
1241 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1242 vec<constructor_elt, va_gc> *v;
1243 tree constr, t, si, i_val;
1244 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1245 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1246 location_t loc = gimple_location (gsi_stmt (*gsi));
1247 unsigned i;
1249 if (TREE_CODE (mask) == SSA_NAME)
1251 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1252 if (is_gimple_assign (def_stmt)
1253 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1254 mask = gimple_assign_rhs1 (def_stmt);
1257 if (TREE_CODE (mask) == VECTOR_CST)
1259 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1261 for (i = 0; i < elements; ++i)
1262 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1263 & (2 * elements - 1));
1265 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1267 gimple_assign_set_rhs3 (stmt, mask);
1268 update_stmt (stmt);
1269 return;
1272 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1273 return;
1275 warning_at (loc, OPT_Wvector_operation_performance,
1276 "vector shuffling operation will be expanded piecewise");
1278 vec_alloc (v, elements);
1279 for (i = 0; i < elements; i++)
1281 si = size_int (i);
1282 i_val = vector_element (gsi, mask, si, &masktmp);
1284 if (TREE_CODE (i_val) == INTEGER_CST)
1286 unsigned HOST_WIDE_INT index;
1288 index = TREE_INT_CST_LOW (i_val);
1289 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1290 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1292 if (two_operand_p && (index & elements) != 0)
1293 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1294 else
1295 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1297 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1298 true, GSI_SAME_STMT);
1300 else
1302 tree cond = NULL_TREE, v0_val;
1304 if (two_operand_p)
1306 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1307 build_int_cst (mask_elt_type, elements));
1308 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1309 true, GSI_SAME_STMT);
1312 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1313 build_int_cst (mask_elt_type, elements - 1));
1314 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1315 true, GSI_SAME_STMT);
1317 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1318 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1319 true, GSI_SAME_STMT);
1321 if (two_operand_p)
1323 tree v1_val;
1325 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1326 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1327 true, GSI_SAME_STMT);
1329 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1330 cond, build_zero_cst (mask_elt_type));
1331 cond = fold_build3 (COND_EXPR, vect_elt_type,
1332 cond, v0_val, v1_val);
1333 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1334 true, GSI_SAME_STMT);
1336 else
1337 t = v0_val;
1340 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1343 constr = build_constructor (vect_type, v);
1344 gimple_assign_set_rhs_from_tree (gsi, constr);
1345 update_stmt (gsi_stmt (*gsi));
1348 /* Return type in which CODE operation with optab OP can be
1349 computed. */
1351 static tree
1352 get_compute_type (enum tree_code code, optab op, tree type)
1354 /* For very wide vectors, try using a smaller vector mode. */
1355 tree compute_type = type;
1356 if (op
1357 && (!VECTOR_MODE_P (TYPE_MODE (type))
1358 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1360 tree vector_compute_type
1361 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1362 if (vector_compute_type != NULL_TREE
1363 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1364 < TYPE_VECTOR_SUBPARTS (compute_type))
1365 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1366 != CODE_FOR_nothing))
1367 compute_type = vector_compute_type;
1370 /* If we are breaking a BLKmode vector into smaller pieces,
1371 type_for_widest_vector_mode has already looked into the optab,
1372 so skip these checks. */
1373 if (compute_type == type)
1375 machine_mode compute_mode = TYPE_MODE (compute_type);
1376 if (VECTOR_MODE_P (compute_mode))
1378 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1379 return compute_type;
1380 if (code == MULT_HIGHPART_EXPR
1381 && can_mult_highpart_p (compute_mode,
1382 TYPE_UNSIGNED (compute_type)))
1383 return compute_type;
1385 /* There is no operation in hardware, so fall back to scalars. */
1386 compute_type = TREE_TYPE (type);
1389 return compute_type;
1392 /* Helper function of expand_vector_operations_1. Return number of
1393 vector elements for vector types or 1 for other types. */
1395 static inline int
1396 count_type_subparts (tree type)
1398 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1401 /* Process one statement. If we identify a vector operation, expand it. */
1403 static void
1404 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1406 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1407 enum tree_code code;
1408 optab op = unknown_optab;
1409 enum gimple_rhs_class rhs_class;
1410 tree new_rhs;
1412 /* Only consider code == GIMPLE_ASSIGN. */
1413 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1414 if (!stmt)
1415 return;
1417 code = gimple_assign_rhs_code (stmt);
1418 rhs_class = get_gimple_rhs_class (code);
1419 lhs = gimple_assign_lhs (stmt);
1421 if (code == VEC_PERM_EXPR)
1423 lower_vec_perm (gsi);
1424 return;
1427 if (code == VEC_COND_EXPR)
1429 expand_vector_condition (gsi);
1430 return;
1433 if (code == CONSTRUCTOR
1434 && TREE_CODE (lhs) == SSA_NAME
1435 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1436 && !gimple_clobber_p (stmt)
1437 && optimize)
1439 optimize_vector_constructor (gsi);
1440 return;
1443 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1444 return;
1446 rhs1 = gimple_assign_rhs1 (stmt);
1447 type = gimple_expr_type (stmt);
1448 if (rhs_class == GIMPLE_BINARY_RHS)
1449 rhs2 = gimple_assign_rhs2 (stmt);
1451 if (TREE_CODE (type) != VECTOR_TYPE)
1452 return;
1454 if (CONVERT_EXPR_CODE_P (code)
1455 || code == FLOAT_EXPR
1456 || code == FIX_TRUNC_EXPR
1457 || code == VIEW_CONVERT_EXPR)
1458 return;
1460 /* The signedness is determined from input argument. */
1461 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1462 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1463 type = TREE_TYPE (rhs1);
1465 /* For widening/narrowing vector operations, the relevant type is of the
1466 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1467 calculated in the same way above. */
1468 if (code == WIDEN_SUM_EXPR
1469 || code == VEC_WIDEN_MULT_HI_EXPR
1470 || code == VEC_WIDEN_MULT_LO_EXPR
1471 || code == VEC_WIDEN_MULT_EVEN_EXPR
1472 || code == VEC_WIDEN_MULT_ODD_EXPR
1473 || code == VEC_UNPACK_HI_EXPR
1474 || code == VEC_UNPACK_LO_EXPR
1475 || code == VEC_PACK_TRUNC_EXPR
1476 || code == VEC_PACK_SAT_EXPR
1477 || code == VEC_PACK_FIX_TRUNC_EXPR
1478 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1479 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1480 type = TREE_TYPE (rhs1);
1482 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1483 scalar */
1484 if (code == LSHIFT_EXPR
1485 || code == RSHIFT_EXPR
1486 || code == LROTATE_EXPR
1487 || code == RROTATE_EXPR)
1489 optab opv;
1491 /* Check whether we have vector <op> {x,x,x,x} where x
1492 could be a scalar variable or a constant. Transform
1493 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1494 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1496 tree first;
1497 gimple def_stmt;
1499 if ((TREE_CODE (rhs2) == VECTOR_CST
1500 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1501 || (TREE_CODE (rhs2) == SSA_NAME
1502 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1503 && gimple_assign_single_p (def_stmt)
1504 && (first = uniform_vector_p
1505 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1507 gimple_assign_set_rhs2 (stmt, first);
1508 update_stmt (stmt);
1509 rhs2 = first;
1513 opv = optab_for_tree_code (code, type, optab_vector);
1514 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1515 op = opv;
1516 else
1518 op = optab_for_tree_code (code, type, optab_scalar);
1520 compute_type = get_compute_type (code, op, type);
1521 if (compute_type == type)
1522 return;
1523 /* The rtl expander will expand vector/scalar as vector/vector
1524 if necessary. Pick one with wider vector type. */
1525 tree compute_vtype = get_compute_type (code, opv, type);
1526 if (count_type_subparts (compute_vtype)
1527 > count_type_subparts (compute_type))
1529 compute_type = compute_vtype;
1530 op = opv;
1534 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1536 if (compute_type == NULL_TREE)
1537 compute_type = get_compute_type (code, op, type);
1538 if (compute_type == type)
1539 return;
1540 /* Before splitting vector rotates into scalar rotates,
1541 see if we can't use vector shifts and BIT_IOR_EXPR
1542 instead. For vector by vector rotates we'd also
1543 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1544 for now, fold doesn't seem to create such rotates anyway. */
1545 if (compute_type == TREE_TYPE (type)
1546 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1548 optab oplv = vashl_optab, opl = ashl_optab;
1549 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1550 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1551 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1552 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1553 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1554 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1555 /* The rtl expander will expand vector/scalar as vector/vector
1556 if necessary. Pick one with wider vector type. */
1557 if (count_type_subparts (compute_lvtype)
1558 > count_type_subparts (compute_ltype))
1560 compute_ltype = compute_lvtype;
1561 opl = oplv;
1563 if (count_type_subparts (compute_rvtype)
1564 > count_type_subparts (compute_rtype))
1566 compute_rtype = compute_rvtype;
1567 opr = oprv;
1569 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1570 BIT_IOR_EXPR. */
1571 compute_type = compute_ltype;
1572 if (count_type_subparts (compute_type)
1573 > count_type_subparts (compute_rtype))
1574 compute_type = compute_rtype;
1575 if (count_type_subparts (compute_type)
1576 > count_type_subparts (compute_otype))
1577 compute_type = compute_otype;
1578 /* Verify all 3 operations can be performed in that type. */
1579 if (compute_type != TREE_TYPE (type))
1581 if (optab_handler (opl, TYPE_MODE (compute_type))
1582 == CODE_FOR_nothing
1583 || optab_handler (opr, TYPE_MODE (compute_type))
1584 == CODE_FOR_nothing
1585 || optab_handler (opo, TYPE_MODE (compute_type))
1586 == CODE_FOR_nothing)
1587 compute_type = TREE_TYPE (type);
1592 else
1593 op = optab_for_tree_code (code, type, optab_default);
1595 /* Optabs will try converting a negation into a subtraction, so
1596 look for it as well. TODO: negation of floating-point vectors
1597 might be turned into an exclusive OR toggling the sign bit. */
1598 if (op == unknown_optab
1599 && code == NEGATE_EXPR
1600 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1601 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1603 if (compute_type == NULL_TREE)
1604 compute_type = get_compute_type (code, op, type);
1605 if (compute_type == type)
1606 return;
1608 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1610 /* Leave expression untouched for later expansion. */
1611 if (new_rhs == NULL_TREE)
1612 return;
1614 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1615 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1616 new_rhs);
1618 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1619 way to do it is change expand_vector_operation and its callees to
1620 return a tree_code, RHS1 and RHS2 instead of a tree. */
1621 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1622 update_stmt (gsi_stmt (*gsi));
1625 /* Use this to lower vector operations introduced by the vectorizer,
1626 if it may need the bit-twiddling tricks implemented in this file. */
1628 static unsigned int
1629 expand_vector_operations (void)
1631 gimple_stmt_iterator gsi;
1632 basic_block bb;
1633 bool cfg_changed = false;
1635 FOR_EACH_BB_FN (bb, cfun)
1637 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1639 expand_vector_operations_1 (&gsi);
1640 /* ??? If we do not cleanup EH then we will ICE in
1641 verification. But in reality we have created wrong-code
1642 as we did not properly transition EH info and edges to
1643 the piecewise computations. */
1644 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1645 && gimple_purge_dead_eh_edges (bb))
1646 cfg_changed = true;
1650 return cfg_changed ? TODO_cleanup_cfg : 0;
1653 namespace {
1655 const pass_data pass_data_lower_vector =
1657 GIMPLE_PASS, /* type */
1658 "veclower", /* name */
1659 OPTGROUP_VEC, /* optinfo_flags */
1660 TV_NONE, /* tv_id */
1661 PROP_cfg, /* properties_required */
1662 PROP_gimple_lvec, /* properties_provided */
1663 0, /* properties_destroyed */
1664 0, /* todo_flags_start */
1665 ( TODO_update_ssa
1666 | TODO_cleanup_cfg ), /* todo_flags_finish */
1669 class pass_lower_vector : public gimple_opt_pass
1671 public:
1672 pass_lower_vector (gcc::context *ctxt)
1673 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1676 /* opt_pass methods: */
1677 virtual bool gate (function *fun)
1679 return !(fun->curr_properties & PROP_gimple_lvec);
1682 virtual unsigned int execute (function *)
1684 return expand_vector_operations ();
1687 }; // class pass_lower_vector
1689 } // anon namespace
1691 gimple_opt_pass *
1692 make_pass_lower_vector (gcc::context *ctxt)
1694 return new pass_lower_vector (ctxt);
1697 namespace {
1699 const pass_data pass_data_lower_vector_ssa =
1701 GIMPLE_PASS, /* type */
1702 "veclower2", /* name */
1703 OPTGROUP_VEC, /* optinfo_flags */
1704 TV_NONE, /* tv_id */
1705 PROP_cfg, /* properties_required */
1706 PROP_gimple_lvec, /* properties_provided */
1707 0, /* properties_destroyed */
1708 0, /* todo_flags_start */
1709 ( TODO_update_ssa
1710 | TODO_cleanup_cfg ), /* todo_flags_finish */
1713 class pass_lower_vector_ssa : public gimple_opt_pass
1715 public:
1716 pass_lower_vector_ssa (gcc::context *ctxt)
1717 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1720 /* opt_pass methods: */
1721 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1722 virtual unsigned int execute (function *)
1724 return expand_vector_operations ();
1727 }; // class pass_lower_vector_ssa
1729 } // anon namespace
1731 gimple_opt_pass *
1732 make_pass_lower_vector_ssa (gcc::context *ctxt)
1734 return new pass_lower_vector_ssa (ctxt);
1737 #include "gt-tree-vect-generic.h"