PR middle-end/66633
[official-gcc.git] / gcc / tree-vect-generic.c
blob1d20842827cb681d60678f038ad3e73578da8b09
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "alias.h"
24 #include "symtab.h"
25 #include "options.h"
26 #include "tree.h"
27 #include "fold-const.h"
28 #include "stor-layout.h"
29 #include "tm.h"
30 #include "langhooks.h"
31 #include "predict.h"
32 #include "hard-reg-set.h"
33 #include "function.h"
34 #include "dominance.h"
35 #include "cfg.h"
36 #include "basic-block.h"
37 #include "tree-ssa-alias.h"
38 #include "internal-fn.h"
39 #include "tree-eh.h"
40 #include "gimple-expr.h"
41 #include "gimple.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "gimple-ssa.h"
45 #include "tree-cfg.h"
46 #include "stringpool.h"
47 #include "tree-ssanames.h"
48 #include "tree-iterator.h"
49 #include "tree-pass.h"
50 #include "flags.h"
51 #include "diagnostic.h"
52 #include "target.h"
54 /* Need to include rtl.h, expr.h, etc. for optabs. */
55 #include "rtl.h"
56 #include "insn-config.h"
57 #include "expmed.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "calls.h"
61 #include "emit-rtl.h"
62 #include "varasm.h"
63 #include "stmt.h"
64 #include "expr.h"
65 #include "insn-codes.h"
66 #include "optabs.h"
69 static void expand_vector_operations_1 (gimple_stmt_iterator *);
72 /* Build a constant of type TYPE, made of VALUE's bits replicated
73 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
74 static tree
75 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
77 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
78 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
79 / HOST_BITS_PER_WIDE_INT;
80 unsigned HOST_WIDE_INT low, mask;
81 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
82 int i;
84 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
86 if (width == HOST_BITS_PER_WIDE_INT)
87 low = value;
88 else
90 mask = ((HOST_WIDE_INT)1 << width) - 1;
91 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
94 for (i = 0; i < n; i++)
95 a[i] = low;
97 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
98 return wide_int_to_tree
99 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
102 static GTY(()) tree vector_inner_type;
103 static GTY(()) tree vector_last_type;
104 static GTY(()) int vector_last_nunits;
106 /* Return a suitable vector types made of SUBPARTS units each of mode
107 "word_mode" (the global variable). */
108 static tree
109 build_word_mode_vector_type (int nunits)
111 if (!vector_inner_type)
112 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
113 else if (vector_last_nunits == nunits)
115 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
116 return vector_last_type;
119 /* We build a new type, but we canonicalize it nevertheless,
120 because it still saves some memory. */
121 vector_last_nunits = nunits;
122 vector_last_type = type_hash_canon (nunits,
123 build_vector_type (vector_inner_type,
124 nunits));
125 return vector_last_type;
128 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
129 tree, tree, tree, tree, tree, enum tree_code);
131 static inline tree
132 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
133 tree t, tree bitsize, tree bitpos)
135 if (bitpos)
136 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
137 else
138 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
141 static tree
142 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
143 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
144 enum tree_code code)
146 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
147 return gimplify_build1 (gsi, code, inner_type, a);
150 static tree
151 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
152 tree bitpos, tree bitsize, enum tree_code code)
154 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
155 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
156 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
157 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
158 return gimplify_build2 (gsi, code, inner_type, a, b);
161 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
163 INNER_TYPE is the type of A and B elements
165 returned expression is of signed integer type with the
166 size equal to the size of INNER_TYPE. */
167 static tree
168 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
169 tree bitpos, tree bitsize, enum tree_code code)
171 tree comp_type;
173 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
174 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
176 comp_type = build_nonstandard_integer_type
177 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
179 return gimplify_build3 (gsi, COND_EXPR, comp_type,
180 fold_build2 (code, boolean_type_node, a, b),
181 build_int_cst (comp_type, -1),
182 build_int_cst (comp_type, 0));
185 /* Expand vector addition to scalars. This does bit twiddling
186 in order to increase parallelism:
188 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
189 (a ^ b) & 0x80808080
191 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
192 (a ^ ~b) & 0x80808080
194 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
196 This optimization should be done only if 4 vector items or more
197 fit into a word. */
198 static tree
199 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
200 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
201 enum tree_code code)
203 tree inner_type = TREE_TYPE (TREE_TYPE (a));
204 unsigned HOST_WIDE_INT max;
205 tree low_bits, high_bits, a_low, b_low, result_low, signs;
207 max = GET_MODE_MASK (TYPE_MODE (inner_type));
208 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
209 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
211 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
212 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
214 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
215 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
216 if (code == PLUS_EXPR)
217 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
218 else
220 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
221 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
224 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
225 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
226 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
229 static tree
230 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
231 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
232 tree bitsize ATTRIBUTE_UNUSED,
233 enum tree_code code ATTRIBUTE_UNUSED)
235 tree inner_type = TREE_TYPE (TREE_TYPE (b));
236 HOST_WIDE_INT max;
237 tree low_bits, high_bits, b_low, result_low, signs;
239 max = GET_MODE_MASK (TYPE_MODE (inner_type));
240 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
241 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
243 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
245 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
246 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
247 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
248 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
249 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
252 /* Expand a vector operation to scalars, by using many operations
253 whose type is the vector type's inner type. */
254 static tree
255 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
256 tree type, tree inner_type,
257 tree a, tree b, enum tree_code code)
259 vec<constructor_elt, va_gc> *v;
260 tree part_width = TYPE_SIZE (inner_type);
261 tree index = bitsize_int (0);
262 int nunits = TYPE_VECTOR_SUBPARTS (type);
263 int delta = tree_to_uhwi (part_width)
264 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
265 int i;
266 location_t loc = gimple_location (gsi_stmt (*gsi));
268 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
269 warning_at (loc, OPT_Wvector_operation_performance,
270 "vector operation will be expanded piecewise");
271 else
272 warning_at (loc, OPT_Wvector_operation_performance,
273 "vector operation will be expanded in parallel");
275 vec_alloc (v, (nunits + delta - 1) / delta);
276 for (i = 0; i < nunits;
277 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
279 tree result = f (gsi, inner_type, a, b, index, part_width, code);
280 constructor_elt ce = {NULL_TREE, result};
281 v->quick_push (ce);
284 return build_constructor (type, v);
287 /* Expand a vector operation to scalars with the freedom to use
288 a scalar integer type, or to use a different size for the items
289 in the vector type. */
290 static tree
291 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
292 tree a, tree b,
293 enum tree_code code)
295 tree result, compute_type;
296 machine_mode mode;
297 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
298 location_t loc = gimple_location (gsi_stmt (*gsi));
300 /* We have three strategies. If the type is already correct, just do
301 the operation an element at a time. Else, if the vector is wider than
302 one word, do it a word at a time; finally, if the vector is smaller
303 than one word, do it as a scalar. */
304 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
305 return expand_vector_piecewise (gsi, f,
306 type, TREE_TYPE (type),
307 a, b, code);
308 else if (n_words > 1)
310 tree word_type = build_word_mode_vector_type (n_words);
311 result = expand_vector_piecewise (gsi, f,
312 word_type, TREE_TYPE (word_type),
313 a, b, code);
314 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
315 GSI_SAME_STMT);
317 else
319 /* Use a single scalar operation with a mode no wider than word_mode. */
320 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
321 compute_type = lang_hooks.types.type_for_mode (mode, 1);
322 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
323 warning_at (loc, OPT_Wvector_operation_performance,
324 "vector operation will be expanded with a "
325 "single scalar operation");
328 return result;
331 /* Expand a vector operation to scalars; for integer types we can use
332 special bit twiddling tricks to do the sums a word at a time, using
333 function F_PARALLEL instead of F. These tricks are done only if
334 they can process at least four items, that is, only if the vector
335 holds at least four items and if a word can hold four items. */
336 static tree
337 expand_vector_addition (gimple_stmt_iterator *gsi,
338 elem_op_func f, elem_op_func f_parallel,
339 tree type, tree a, tree b, enum tree_code code)
341 int parts_per_word = UNITS_PER_WORD
342 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
344 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
345 && parts_per_word >= 4
346 && TYPE_VECTOR_SUBPARTS (type) >= 4)
347 return expand_vector_parallel (gsi, f_parallel,
348 type, a, b, code);
349 else
350 return expand_vector_piecewise (gsi, f,
351 type, TREE_TYPE (type),
352 a, b, code);
355 /* Try to expand vector comparison expression OP0 CODE OP1 by
356 querying optab if the following expression:
357 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
358 can be expanded. */
359 static tree
360 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
361 tree op1, enum tree_code code)
363 tree t;
364 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
365 t = expand_vector_piecewise (gsi, do_compare, type,
366 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
367 else
368 t = NULL_TREE;
370 return t;
373 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
374 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
375 the result if successful, otherwise return NULL_TREE. */
376 static tree
377 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
379 optab op;
380 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
381 bool scalar_shift = true;
383 for (i = 1; i < nunits; i++)
385 if (shiftcnts[i] != shiftcnts[0])
386 scalar_shift = false;
389 if (scalar_shift && shiftcnts[0] == 0)
390 return op0;
392 if (scalar_shift)
394 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
395 if (op != unknown_optab
396 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
397 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
398 build_int_cst (NULL_TREE, shiftcnts[0]));
401 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
402 if (op != unknown_optab
403 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
405 tree *vec = XALLOCAVEC (tree, nunits);
406 for (i = 0; i < nunits; i++)
407 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
408 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
409 build_vector (type, vec));
412 return NULL_TREE;
415 /* Try to expand integer vector division by constant using
416 widening multiply, shifts and additions. */
417 static tree
418 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
419 tree op1, enum tree_code code)
421 bool use_pow2 = true;
422 bool has_vector_shift = true;
423 int mode = -1, this_mode;
424 int pre_shift = -1, post_shift;
425 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
426 int *shifts = XALLOCAVEC (int, nunits * 4);
427 int *pre_shifts = shifts + nunits;
428 int *post_shifts = pre_shifts + nunits;
429 int *shift_temps = post_shifts + nunits;
430 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
431 int prec = TYPE_PRECISION (TREE_TYPE (type));
432 int dummy_int;
433 unsigned int i;
434 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
435 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
436 tree *vec;
437 tree cur_op, mulcst, tem;
438 optab op;
440 if (prec > HOST_BITS_PER_WIDE_INT)
441 return NULL_TREE;
443 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
444 if (op == unknown_optab
445 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
446 has_vector_shift = false;
448 /* Analysis phase. Determine if all op1 elements are either power
449 of two and it is possible to expand it using shifts (or for remainder
450 using masking). Additionally compute the multiplicative constants
451 and pre and post shifts if the division is to be expanded using
452 widening or high part multiplication plus shifts. */
453 for (i = 0; i < nunits; i++)
455 tree cst = VECTOR_CST_ELT (op1, i);
456 unsigned HOST_WIDE_INT ml;
458 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
459 return NULL_TREE;
460 pre_shifts[i] = 0;
461 post_shifts[i] = 0;
462 mulc[i] = 0;
463 if (use_pow2
464 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
465 use_pow2 = false;
466 if (use_pow2)
468 shifts[i] = tree_log2 (cst);
469 if (shifts[i] != shifts[0]
470 && code == TRUNC_DIV_EXPR
471 && !has_vector_shift)
472 use_pow2 = false;
474 if (mode == -2)
475 continue;
476 if (sign_p == UNSIGNED)
478 unsigned HOST_WIDE_INT mh;
479 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
481 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
482 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
483 return NULL_TREE;
485 if (d <= 1)
487 mode = -2;
488 continue;
491 /* Find a suitable multiplier and right shift count
492 instead of multiplying with D. */
493 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
495 /* If the suggested multiplier is more than SIZE bits, we can
496 do better for even divisors, using an initial right shift. */
497 if ((mh != 0 && (d & 1) == 0)
498 || (!has_vector_shift && pre_shift != -1))
500 if (has_vector_shift)
501 pre_shift = floor_log2 (d & -d);
502 else if (pre_shift == -1)
504 unsigned int j;
505 for (j = 0; j < nunits; j++)
507 tree cst2 = VECTOR_CST_ELT (op1, j);
508 unsigned HOST_WIDE_INT d2;
509 int this_pre_shift;
511 if (!tree_fits_uhwi_p (cst2))
512 return NULL_TREE;
513 d2 = tree_to_uhwi (cst2) & mask;
514 if (d2 == 0)
515 return NULL_TREE;
516 this_pre_shift = floor_log2 (d2 & -d2);
517 if (pre_shift == -1 || this_pre_shift < pre_shift)
518 pre_shift = this_pre_shift;
520 if (i != 0 && pre_shift != 0)
522 /* Restart. */
523 i = -1U;
524 mode = -1;
525 continue;
528 if (pre_shift != 0)
530 if ((d >> pre_shift) <= 1)
532 mode = -2;
533 continue;
535 mh = choose_multiplier (d >> pre_shift, prec,
536 prec - pre_shift,
537 &ml, &post_shift, &dummy_int);
538 gcc_assert (!mh);
539 pre_shifts[i] = pre_shift;
542 if (!mh)
543 this_mode = 0;
544 else
545 this_mode = 1;
547 else
549 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
550 unsigned HOST_WIDE_INT abs_d;
552 if (d == -1)
553 return NULL_TREE;
555 /* Since d might be INT_MIN, we have to cast to
556 unsigned HOST_WIDE_INT before negating to avoid
557 undefined signed overflow. */
558 abs_d = (d >= 0
559 ? (unsigned HOST_WIDE_INT) d
560 : - (unsigned HOST_WIDE_INT) d);
562 /* n rem d = n rem -d */
563 if (code == TRUNC_MOD_EXPR && d < 0)
564 d = abs_d;
565 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
567 /* This case is not handled correctly below. */
568 mode = -2;
569 continue;
571 if (abs_d <= 1)
573 mode = -2;
574 continue;
577 choose_multiplier (abs_d, prec, prec - 1, &ml,
578 &post_shift, &dummy_int);
579 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
581 this_mode = 4 + (d < 0);
582 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
584 else
585 this_mode = 2 + (d < 0);
587 mulc[i] = ml;
588 post_shifts[i] = post_shift;
589 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
590 || post_shift >= prec
591 || pre_shifts[i] >= prec)
592 this_mode = -2;
594 if (i == 0)
595 mode = this_mode;
596 else if (mode != this_mode)
597 mode = -2;
600 vec = XALLOCAVEC (tree, nunits);
602 if (use_pow2)
604 tree addend = NULL_TREE;
605 if (sign_p == SIGNED)
607 tree uns_type;
609 /* Both division and remainder sequences need
610 op0 < 0 ? mask : 0 computed. It can be either computed as
611 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
612 if none of the shifts is 0, or as the conditional. */
613 for (i = 0; i < nunits; i++)
614 if (shifts[i] == 0)
615 break;
616 uns_type
617 = build_vector_type (build_nonstandard_integer_type (prec, 1),
618 nunits);
619 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
621 for (i = 0; i < nunits; i++)
622 shift_temps[i] = prec - 1;
623 cur_op = add_rshift (gsi, type, op0, shift_temps);
624 if (cur_op != NULL_TREE)
626 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
627 uns_type, cur_op);
628 for (i = 0; i < nunits; i++)
629 shift_temps[i] = prec - shifts[i];
630 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
631 if (cur_op != NULL_TREE)
632 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
633 type, cur_op);
636 if (addend == NULL_TREE
637 && expand_vec_cond_expr_p (type, type))
639 tree zero, cst, cond;
640 gimple stmt;
642 zero = build_zero_cst (type);
643 cond = build2 (LT_EXPR, type, op0, zero);
644 for (i = 0; i < nunits; i++)
645 vec[i] = build_int_cst (TREE_TYPE (type),
646 ((unsigned HOST_WIDE_INT) 1
647 << shifts[i]) - 1);
648 cst = build_vector (type, vec);
649 addend = make_ssa_name (type);
650 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
651 cst, zero);
652 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
655 if (code == TRUNC_DIV_EXPR)
657 if (sign_p == UNSIGNED)
659 /* q = op0 >> shift; */
660 cur_op = add_rshift (gsi, type, op0, shifts);
661 if (cur_op != NULL_TREE)
662 return cur_op;
664 else if (addend != NULL_TREE)
666 /* t1 = op0 + addend;
667 q = t1 >> shift; */
668 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
669 if (op != unknown_optab
670 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
672 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
673 cur_op = add_rshift (gsi, type, cur_op, shifts);
674 if (cur_op != NULL_TREE)
675 return cur_op;
679 else
681 tree mask;
682 for (i = 0; i < nunits; i++)
683 vec[i] = build_int_cst (TREE_TYPE (type),
684 ((unsigned HOST_WIDE_INT) 1
685 << shifts[i]) - 1);
686 mask = build_vector (type, vec);
687 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
688 if (op != unknown_optab
689 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
691 if (sign_p == UNSIGNED)
692 /* r = op0 & mask; */
693 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
694 else if (addend != NULL_TREE)
696 /* t1 = op0 + addend;
697 t2 = t1 & mask;
698 r = t2 - addend; */
699 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
700 if (op != unknown_optab
701 && optab_handler (op, TYPE_MODE (type))
702 != CODE_FOR_nothing)
704 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
705 addend);
706 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
707 cur_op, mask);
708 op = optab_for_tree_code (MINUS_EXPR, type,
709 optab_default);
710 if (op != unknown_optab
711 && optab_handler (op, TYPE_MODE (type))
712 != CODE_FOR_nothing)
713 return gimplify_build2 (gsi, MINUS_EXPR, type,
714 cur_op, addend);
721 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
722 return NULL_TREE;
724 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
725 return NULL_TREE;
727 cur_op = op0;
729 switch (mode)
731 case 0:
732 gcc_assert (sign_p == UNSIGNED);
733 /* t1 = oprnd0 >> pre_shift;
734 t2 = t1 h* ml;
735 q = t2 >> post_shift; */
736 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
737 if (cur_op == NULL_TREE)
738 return NULL_TREE;
739 break;
740 case 1:
741 gcc_assert (sign_p == UNSIGNED);
742 for (i = 0; i < nunits; i++)
744 shift_temps[i] = 1;
745 post_shifts[i]--;
747 break;
748 case 2:
749 case 3:
750 case 4:
751 case 5:
752 gcc_assert (sign_p == SIGNED);
753 for (i = 0; i < nunits; i++)
754 shift_temps[i] = prec - 1;
755 break;
756 default:
757 return NULL_TREE;
760 for (i = 0; i < nunits; i++)
761 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
762 mulcst = build_vector (type, vec);
764 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
766 switch (mode)
768 case 0:
769 /* t1 = oprnd0 >> pre_shift;
770 t2 = t1 h* ml;
771 q = t2 >> post_shift; */
772 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
773 break;
774 case 1:
775 /* t1 = oprnd0 h* ml;
776 t2 = oprnd0 - t1;
777 t3 = t2 >> 1;
778 t4 = t1 + t3;
779 q = t4 >> (post_shift - 1); */
780 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
781 if (op == unknown_optab
782 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
783 return NULL_TREE;
784 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
785 tem = add_rshift (gsi, type, tem, shift_temps);
786 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
787 if (op == unknown_optab
788 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
789 return NULL_TREE;
790 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
791 cur_op = add_rshift (gsi, type, tem, post_shifts);
792 if (cur_op == NULL_TREE)
793 return NULL_TREE;
794 break;
795 case 2:
796 case 3:
797 case 4:
798 case 5:
799 /* t1 = oprnd0 h* ml;
800 t2 = t1; [ iff (mode & 2) != 0 ]
801 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
802 t3 = t2 >> post_shift;
803 t4 = oprnd0 >> (prec - 1);
804 q = t3 - t4; [ iff (mode & 1) == 0 ]
805 q = t4 - t3; [ iff (mode & 1) != 0 ] */
806 if ((mode & 2) == 0)
808 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
809 if (op == unknown_optab
810 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
811 return NULL_TREE;
812 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
814 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
815 if (cur_op == NULL_TREE)
816 return NULL_TREE;
817 tem = add_rshift (gsi, type, op0, shift_temps);
818 if (tem == NULL_TREE)
819 return NULL_TREE;
820 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
821 if (op == unknown_optab
822 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
823 return NULL_TREE;
824 if ((mode & 1) == 0)
825 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
826 else
827 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
828 break;
829 default:
830 gcc_unreachable ();
833 if (code == TRUNC_DIV_EXPR)
834 return cur_op;
836 /* We divided. Now finish by:
837 t1 = q * oprnd1;
838 r = oprnd0 - t1; */
839 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
840 if (op == unknown_optab
841 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
842 return NULL_TREE;
843 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
844 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
845 if (op == unknown_optab
846 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
847 return NULL_TREE;
848 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
851 /* Expand a vector condition to scalars, by using many conditions
852 on the vector's elements. */
853 static void
854 expand_vector_condition (gimple_stmt_iterator *gsi)
856 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
857 tree type = gimple_expr_type (stmt);
858 tree a = gimple_assign_rhs1 (stmt);
859 tree a1 = a;
860 tree a2;
861 bool a_is_comparison = false;
862 tree b = gimple_assign_rhs2 (stmt);
863 tree c = gimple_assign_rhs3 (stmt);
864 vec<constructor_elt, va_gc> *v;
865 tree constr;
866 tree inner_type = TREE_TYPE (type);
867 tree cond_type = TREE_TYPE (TREE_TYPE (a));
868 tree comp_inner_type = cond_type;
869 tree width = TYPE_SIZE (inner_type);
870 tree index = bitsize_int (0);
871 int nunits = TYPE_VECTOR_SUBPARTS (type);
872 int i;
873 location_t loc = gimple_location (gsi_stmt (*gsi));
875 if (!is_gimple_val (a))
877 gcc_assert (COMPARISON_CLASS_P (a));
878 a_is_comparison = true;
879 a1 = TREE_OPERAND (a, 0);
880 a2 = TREE_OPERAND (a, 1);
881 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
884 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
885 return;
887 /* TODO: try and find a smaller vector type. */
889 warning_at (loc, OPT_Wvector_operation_performance,
890 "vector condition will be expanded piecewise");
892 vec_alloc (v, nunits);
893 for (i = 0; i < nunits;
894 i++, index = int_const_binop (PLUS_EXPR, index, width))
896 tree aa, result;
897 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
898 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
899 if (a_is_comparison)
901 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
902 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
903 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
905 else
906 aa = tree_vec_extract (gsi, cond_type, a, width, index);
907 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
908 constructor_elt ce = {NULL_TREE, result};
909 v->quick_push (ce);
912 constr = build_constructor (type, v);
913 gimple_assign_set_rhs_from_tree (gsi, constr);
914 update_stmt (gsi_stmt (*gsi));
917 static tree
918 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
919 gassign *assign, enum tree_code code)
921 machine_mode compute_mode = TYPE_MODE (compute_type);
923 /* If the compute mode is not a vector mode (hence we are not decomposing
924 a BLKmode vector to smaller, hardware-supported vectors), we may want
925 to expand the operations in parallel. */
926 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
927 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
928 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
929 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
930 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
931 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
932 switch (code)
934 case PLUS_EXPR:
935 case MINUS_EXPR:
936 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
937 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
938 gimple_assign_rhs1 (assign),
939 gimple_assign_rhs2 (assign), code);
940 break;
942 case NEGATE_EXPR:
943 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
944 return expand_vector_addition (gsi, do_unop, do_negate, type,
945 gimple_assign_rhs1 (assign),
946 NULL_TREE, code);
947 break;
949 case BIT_AND_EXPR:
950 case BIT_IOR_EXPR:
951 case BIT_XOR_EXPR:
952 return expand_vector_parallel (gsi, do_binop, type,
953 gimple_assign_rhs1 (assign),
954 gimple_assign_rhs2 (assign), code);
956 case BIT_NOT_EXPR:
957 return expand_vector_parallel (gsi, do_unop, type,
958 gimple_assign_rhs1 (assign),
959 NULL_TREE, code);
960 case EQ_EXPR:
961 case NE_EXPR:
962 case GT_EXPR:
963 case LT_EXPR:
964 case GE_EXPR:
965 case LE_EXPR:
966 case UNEQ_EXPR:
967 case UNGT_EXPR:
968 case UNLT_EXPR:
969 case UNGE_EXPR:
970 case UNLE_EXPR:
971 case LTGT_EXPR:
972 case ORDERED_EXPR:
973 case UNORDERED_EXPR:
975 tree rhs1 = gimple_assign_rhs1 (assign);
976 tree rhs2 = gimple_assign_rhs2 (assign);
978 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
981 case TRUNC_DIV_EXPR:
982 case TRUNC_MOD_EXPR:
984 tree rhs1 = gimple_assign_rhs1 (assign);
985 tree rhs2 = gimple_assign_rhs2 (assign);
986 tree ret;
988 if (!optimize
989 || !VECTOR_INTEGER_TYPE_P (type)
990 || TREE_CODE (rhs2) != VECTOR_CST
991 || !VECTOR_MODE_P (TYPE_MODE (type)))
992 break;
994 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
995 if (ret != NULL_TREE)
996 return ret;
997 break;
1000 default:
1001 break;
1004 if (TREE_CODE_CLASS (code) == tcc_unary)
1005 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1006 gimple_assign_rhs1 (assign),
1007 NULL_TREE, code);
1008 else
1009 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1010 gimple_assign_rhs1 (assign),
1011 gimple_assign_rhs2 (assign), code);
1014 /* Try to optimize
1015 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1016 style stmts into:
1017 _9 = { b_7, b_7, b_7, b_7 };
1018 a_5 = _9 + { 0, 3, 6, 9 };
1019 because vector splat operation is usually more efficient
1020 than piecewise initialization of the vector. */
1022 static void
1023 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1025 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1026 tree lhs = gimple_assign_lhs (stmt);
1027 tree rhs = gimple_assign_rhs1 (stmt);
1028 tree type = TREE_TYPE (rhs);
1029 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1030 bool all_same = true;
1031 constructor_elt *elt;
1032 tree *cst;
1033 gimple g;
1034 tree base = NULL_TREE;
1035 optab op;
1037 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1038 return;
1039 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1040 if (op == unknown_optab
1041 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1042 return;
1043 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1044 if (TREE_CODE (elt->value) != SSA_NAME
1045 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1046 return;
1047 else
1049 tree this_base = elt->value;
1050 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1051 all_same = false;
1052 for (j = 0; j < nelts + 1; j++)
1054 g = SSA_NAME_DEF_STMT (this_base);
1055 if (is_gimple_assign (g)
1056 && gimple_assign_rhs_code (g) == PLUS_EXPR
1057 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1058 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1059 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1060 this_base = gimple_assign_rhs1 (g);
1061 else
1062 break;
1064 if (i == 0)
1065 base = this_base;
1066 else if (this_base != base)
1067 return;
1069 if (all_same)
1070 return;
1071 cst = XALLOCAVEC (tree, nelts);
1072 for (i = 0; i < nelts; i++)
1074 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1075 cst[i] = build_zero_cst (TREE_TYPE (base));
1076 while (this_base != base)
1078 g = SSA_NAME_DEF_STMT (this_base);
1079 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1080 cst[i], gimple_assign_rhs2 (g));
1081 if (cst[i] == NULL_TREE
1082 || TREE_CODE (cst[i]) != INTEGER_CST
1083 || TREE_OVERFLOW (cst[i]))
1084 return;
1085 this_base = gimple_assign_rhs1 (g);
1088 for (i = 0; i < nelts; i++)
1089 CONSTRUCTOR_ELT (rhs, i)->value = base;
1090 g = gimple_build_assign (make_ssa_name (type), rhs);
1091 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1092 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1093 build_vector (type, cst));
1094 gsi_replace (gsi, g, false);
1097 /* Return a type for the widest vector mode whose components are of type
1098 TYPE, or NULL_TREE if none is found. */
1100 static tree
1101 type_for_widest_vector_mode (tree type, optab op)
1103 machine_mode inner_mode = TYPE_MODE (type);
1104 machine_mode best_mode = VOIDmode, mode;
1105 int best_nunits = 0;
1107 if (SCALAR_FLOAT_MODE_P (inner_mode))
1108 mode = MIN_MODE_VECTOR_FLOAT;
1109 else if (SCALAR_FRACT_MODE_P (inner_mode))
1110 mode = MIN_MODE_VECTOR_FRACT;
1111 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1112 mode = MIN_MODE_VECTOR_UFRACT;
1113 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1114 mode = MIN_MODE_VECTOR_ACCUM;
1115 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1116 mode = MIN_MODE_VECTOR_UACCUM;
1117 else
1118 mode = MIN_MODE_VECTOR_INT;
1120 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1121 if (GET_MODE_INNER (mode) == inner_mode
1122 && GET_MODE_NUNITS (mode) > best_nunits
1123 && optab_handler (op, mode) != CODE_FOR_nothing)
1124 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1126 if (best_mode == VOIDmode)
1127 return NULL_TREE;
1128 else
1129 return build_vector_type_for_mode (type, best_mode);
1133 /* Build a reference to the element of the vector VECT. Function
1134 returns either the element itself, either BIT_FIELD_REF, or an
1135 ARRAY_REF expression.
1137 GSI is required to insert temporary variables while building a
1138 refernece to the element of the vector VECT.
1140 PTMPVEC is a pointer to the temporary variable for caching
1141 purposes. In case when PTMPVEC is NULL new temporary variable
1142 will be created. */
1143 static tree
1144 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1146 tree vect_type, vect_elt_type;
1147 gimple asgn;
1148 tree tmpvec;
1149 tree arraytype;
1150 bool need_asgn = true;
1151 unsigned int elements;
1153 vect_type = TREE_TYPE (vect);
1154 vect_elt_type = TREE_TYPE (vect_type);
1155 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1157 if (TREE_CODE (idx) == INTEGER_CST)
1159 unsigned HOST_WIDE_INT index;
1161 /* Given that we're about to compute a binary modulus,
1162 we don't care about the high bits of the value. */
1163 index = TREE_INT_CST_LOW (idx);
1164 if (!tree_fits_uhwi_p (idx) || index >= elements)
1166 index &= elements - 1;
1167 idx = build_int_cst (TREE_TYPE (idx), index);
1170 /* When lowering a vector statement sequence do some easy
1171 simplification by looking through intermediate vector results. */
1172 if (TREE_CODE (vect) == SSA_NAME)
1174 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1175 if (is_gimple_assign (def_stmt)
1176 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1177 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1178 vect = gimple_assign_rhs1 (def_stmt);
1181 if (TREE_CODE (vect) == VECTOR_CST)
1182 return VECTOR_CST_ELT (vect, index);
1183 else if (TREE_CODE (vect) == CONSTRUCTOR
1184 && (CONSTRUCTOR_NELTS (vect) == 0
1185 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1186 != VECTOR_TYPE))
1188 if (index < CONSTRUCTOR_NELTS (vect))
1189 return CONSTRUCTOR_ELT (vect, index)->value;
1190 return build_zero_cst (vect_elt_type);
1192 else
1194 tree size = TYPE_SIZE (vect_elt_type);
1195 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1196 size);
1197 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1201 if (!ptmpvec)
1202 tmpvec = create_tmp_var (vect_type, "vectmp");
1203 else if (!*ptmpvec)
1204 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1205 else
1207 tmpvec = *ptmpvec;
1208 need_asgn = false;
1211 if (need_asgn)
1213 TREE_ADDRESSABLE (tmpvec) = 1;
1214 asgn = gimple_build_assign (tmpvec, vect);
1215 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1218 arraytype = build_array_type_nelts (vect_elt_type, elements);
1219 return build4 (ARRAY_REF, vect_elt_type,
1220 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1221 idx, NULL_TREE, NULL_TREE);
1224 /* Check if VEC_PERM_EXPR within the given setting is supported
1225 by hardware, or lower it piecewise.
1227 When VEC_PERM_EXPR has the same first and second operands:
1228 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1229 {v0[mask[0]], v0[mask[1]], ...}
1230 MASK and V0 must have the same number of elements.
1232 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1233 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1234 V0 and V1 must have the same type. MASK, V0, V1 must have the
1235 same number of arguments. */
1237 static void
1238 lower_vec_perm (gimple_stmt_iterator *gsi)
1240 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1241 tree mask = gimple_assign_rhs3 (stmt);
1242 tree vec0 = gimple_assign_rhs1 (stmt);
1243 tree vec1 = gimple_assign_rhs2 (stmt);
1244 tree vect_type = TREE_TYPE (vec0);
1245 tree mask_type = TREE_TYPE (mask);
1246 tree vect_elt_type = TREE_TYPE (vect_type);
1247 tree mask_elt_type = TREE_TYPE (mask_type);
1248 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1249 vec<constructor_elt, va_gc> *v;
1250 tree constr, t, si, i_val;
1251 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1252 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1253 location_t loc = gimple_location (gsi_stmt (*gsi));
1254 unsigned i;
1256 if (TREE_CODE (mask) == SSA_NAME)
1258 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1259 if (is_gimple_assign (def_stmt)
1260 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1261 mask = gimple_assign_rhs1 (def_stmt);
1264 if (TREE_CODE (mask) == VECTOR_CST)
1266 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1268 for (i = 0; i < elements; ++i)
1269 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1270 & (2 * elements - 1));
1272 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1274 gimple_assign_set_rhs3 (stmt, mask);
1275 update_stmt (stmt);
1276 return;
1279 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1280 return;
1282 warning_at (loc, OPT_Wvector_operation_performance,
1283 "vector shuffling operation will be expanded piecewise");
1285 vec_alloc (v, elements);
1286 for (i = 0; i < elements; i++)
1288 si = size_int (i);
1289 i_val = vector_element (gsi, mask, si, &masktmp);
1291 if (TREE_CODE (i_val) == INTEGER_CST)
1293 unsigned HOST_WIDE_INT index;
1295 index = TREE_INT_CST_LOW (i_val);
1296 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1297 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1299 if (two_operand_p && (index & elements) != 0)
1300 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1301 else
1302 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1304 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1305 true, GSI_SAME_STMT);
1307 else
1309 tree cond = NULL_TREE, v0_val;
1311 if (two_operand_p)
1313 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1314 build_int_cst (mask_elt_type, elements));
1315 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1316 true, GSI_SAME_STMT);
1319 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1320 build_int_cst (mask_elt_type, elements - 1));
1321 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1322 true, GSI_SAME_STMT);
1324 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1325 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1326 true, GSI_SAME_STMT);
1328 if (two_operand_p)
1330 tree v1_val;
1332 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1333 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1334 true, GSI_SAME_STMT);
1336 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1337 cond, build_zero_cst (mask_elt_type));
1338 cond = fold_build3 (COND_EXPR, vect_elt_type,
1339 cond, v0_val, v1_val);
1340 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1341 true, GSI_SAME_STMT);
1343 else
1344 t = v0_val;
1347 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1350 constr = build_constructor (vect_type, v);
1351 gimple_assign_set_rhs_from_tree (gsi, constr);
1352 update_stmt (gsi_stmt (*gsi));
1355 /* Return type in which CODE operation with optab OP can be
1356 computed. */
1358 static tree
1359 get_compute_type (enum tree_code code, optab op, tree type)
1361 /* For very wide vectors, try using a smaller vector mode. */
1362 tree compute_type = type;
1363 if (op
1364 && (!VECTOR_MODE_P (TYPE_MODE (type))
1365 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1367 tree vector_compute_type
1368 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1369 if (vector_compute_type != NULL_TREE
1370 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1371 < TYPE_VECTOR_SUBPARTS (compute_type))
1372 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1373 != CODE_FOR_nothing))
1374 compute_type = vector_compute_type;
1377 /* If we are breaking a BLKmode vector into smaller pieces,
1378 type_for_widest_vector_mode has already looked into the optab,
1379 so skip these checks. */
1380 if (compute_type == type)
1382 machine_mode compute_mode = TYPE_MODE (compute_type);
1383 if (VECTOR_MODE_P (compute_mode))
1385 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1386 return compute_type;
1387 if (code == MULT_HIGHPART_EXPR
1388 && can_mult_highpart_p (compute_mode,
1389 TYPE_UNSIGNED (compute_type)))
1390 return compute_type;
1392 /* There is no operation in hardware, so fall back to scalars. */
1393 compute_type = TREE_TYPE (type);
1396 return compute_type;
1399 /* Helper function of expand_vector_operations_1. Return number of
1400 vector elements for vector types or 1 for other types. */
1402 static inline int
1403 count_type_subparts (tree type)
1405 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1408 static tree
1409 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1410 tree bitpos, tree bitsize, enum tree_code code)
1412 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1413 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1414 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1415 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1416 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1417 return gimplify_build3 (gsi, code, inner_type, cond, a, b);
1420 /* Expand a vector COND_EXPR to scalars, piecewise. */
1421 static void
1422 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1424 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1425 tree type = gimple_expr_type (stmt);
1426 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1427 machine_mode compute_mode = TYPE_MODE (compute_type);
1428 gcc_assert (compute_mode != BLKmode);
1429 tree lhs = gimple_assign_lhs (stmt);
1430 tree rhs2 = gimple_assign_rhs2 (stmt);
1431 tree rhs3 = gimple_assign_rhs3 (stmt);
1432 tree new_rhs;
1434 /* If the compute mode is not a vector mode (hence we are not decomposing
1435 a BLKmode vector to smaller, hardware-supported vectors), we may want
1436 to expand the operations in parallel. */
1437 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1438 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1439 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1440 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1441 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1442 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1443 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1444 COND_EXPR);
1445 else
1446 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1447 rhs2, rhs3, COND_EXPR);
1448 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1449 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1450 new_rhs);
1452 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1453 way to do it is change expand_vector_operation and its callees to
1454 return a tree_code, RHS1 and RHS2 instead of a tree. */
1455 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1456 update_stmt (gsi_stmt (*gsi));
1459 /* Process one statement. If we identify a vector operation, expand it. */
1461 static void
1462 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1464 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1465 enum tree_code code;
1466 optab op = unknown_optab;
1467 enum gimple_rhs_class rhs_class;
1468 tree new_rhs;
1470 /* Only consider code == GIMPLE_ASSIGN. */
1471 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1472 if (!stmt)
1473 return;
1475 code = gimple_assign_rhs_code (stmt);
1476 rhs_class = get_gimple_rhs_class (code);
1477 lhs = gimple_assign_lhs (stmt);
1479 if (code == VEC_PERM_EXPR)
1481 lower_vec_perm (gsi);
1482 return;
1485 if (code == VEC_COND_EXPR)
1487 expand_vector_condition (gsi);
1488 return;
1491 if (code == COND_EXPR
1492 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1493 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1495 expand_vector_scalar_condition (gsi);
1496 return;
1499 if (code == CONSTRUCTOR
1500 && TREE_CODE (lhs) == SSA_NAME
1501 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1502 && !gimple_clobber_p (stmt)
1503 && optimize)
1505 optimize_vector_constructor (gsi);
1506 return;
1509 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1510 return;
1512 rhs1 = gimple_assign_rhs1 (stmt);
1513 type = gimple_expr_type (stmt);
1514 if (rhs_class == GIMPLE_BINARY_RHS)
1515 rhs2 = gimple_assign_rhs2 (stmt);
1517 if (TREE_CODE (type) != VECTOR_TYPE)
1518 return;
1520 if (CONVERT_EXPR_CODE_P (code)
1521 || code == FLOAT_EXPR
1522 || code == FIX_TRUNC_EXPR
1523 || code == VIEW_CONVERT_EXPR)
1524 return;
1526 /* The signedness is determined from input argument. */
1527 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1528 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1529 type = TREE_TYPE (rhs1);
1531 /* For widening/narrowing vector operations, the relevant type is of the
1532 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1533 calculated in the same way above. */
1534 if (code == WIDEN_SUM_EXPR
1535 || code == VEC_WIDEN_MULT_HI_EXPR
1536 || code == VEC_WIDEN_MULT_LO_EXPR
1537 || code == VEC_WIDEN_MULT_EVEN_EXPR
1538 || code == VEC_WIDEN_MULT_ODD_EXPR
1539 || code == VEC_UNPACK_HI_EXPR
1540 || code == VEC_UNPACK_LO_EXPR
1541 || code == VEC_PACK_TRUNC_EXPR
1542 || code == VEC_PACK_SAT_EXPR
1543 || code == VEC_PACK_FIX_TRUNC_EXPR
1544 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1545 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1546 type = TREE_TYPE (rhs1);
1548 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1549 scalar */
1550 if (code == LSHIFT_EXPR
1551 || code == RSHIFT_EXPR
1552 || code == LROTATE_EXPR
1553 || code == RROTATE_EXPR)
1555 optab opv;
1557 /* Check whether we have vector <op> {x,x,x,x} where x
1558 could be a scalar variable or a constant. Transform
1559 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1560 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1562 tree first;
1563 gimple def_stmt;
1565 if ((TREE_CODE (rhs2) == VECTOR_CST
1566 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1567 || (TREE_CODE (rhs2) == SSA_NAME
1568 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1569 && gimple_assign_single_p (def_stmt)
1570 && (first = uniform_vector_p
1571 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1573 gimple_assign_set_rhs2 (stmt, first);
1574 update_stmt (stmt);
1575 rhs2 = first;
1579 opv = optab_for_tree_code (code, type, optab_vector);
1580 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1581 op = opv;
1582 else
1584 op = optab_for_tree_code (code, type, optab_scalar);
1586 compute_type = get_compute_type (code, op, type);
1587 if (compute_type == type)
1588 return;
1589 /* The rtl expander will expand vector/scalar as vector/vector
1590 if necessary. Pick one with wider vector type. */
1591 tree compute_vtype = get_compute_type (code, opv, type);
1592 if (count_type_subparts (compute_vtype)
1593 > count_type_subparts (compute_type))
1595 compute_type = compute_vtype;
1596 op = opv;
1600 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1602 if (compute_type == NULL_TREE)
1603 compute_type = get_compute_type (code, op, type);
1604 if (compute_type == type)
1605 return;
1606 /* Before splitting vector rotates into scalar rotates,
1607 see if we can't use vector shifts and BIT_IOR_EXPR
1608 instead. For vector by vector rotates we'd also
1609 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1610 for now, fold doesn't seem to create such rotates anyway. */
1611 if (compute_type == TREE_TYPE (type)
1612 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1614 optab oplv = vashl_optab, opl = ashl_optab;
1615 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1616 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1617 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1618 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1619 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1620 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1621 /* The rtl expander will expand vector/scalar as vector/vector
1622 if necessary. Pick one with wider vector type. */
1623 if (count_type_subparts (compute_lvtype)
1624 > count_type_subparts (compute_ltype))
1626 compute_ltype = compute_lvtype;
1627 opl = oplv;
1629 if (count_type_subparts (compute_rvtype)
1630 > count_type_subparts (compute_rtype))
1632 compute_rtype = compute_rvtype;
1633 opr = oprv;
1635 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1636 BIT_IOR_EXPR. */
1637 compute_type = compute_ltype;
1638 if (count_type_subparts (compute_type)
1639 > count_type_subparts (compute_rtype))
1640 compute_type = compute_rtype;
1641 if (count_type_subparts (compute_type)
1642 > count_type_subparts (compute_otype))
1643 compute_type = compute_otype;
1644 /* Verify all 3 operations can be performed in that type. */
1645 if (compute_type != TREE_TYPE (type))
1647 if (optab_handler (opl, TYPE_MODE (compute_type))
1648 == CODE_FOR_nothing
1649 || optab_handler (opr, TYPE_MODE (compute_type))
1650 == CODE_FOR_nothing
1651 || optab_handler (opo, TYPE_MODE (compute_type))
1652 == CODE_FOR_nothing)
1653 compute_type = TREE_TYPE (type);
1658 else
1659 op = optab_for_tree_code (code, type, optab_default);
1661 /* Optabs will try converting a negation into a subtraction, so
1662 look for it as well. TODO: negation of floating-point vectors
1663 might be turned into an exclusive OR toggling the sign bit. */
1664 if (op == unknown_optab
1665 && code == NEGATE_EXPR
1666 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1667 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1669 if (compute_type == NULL_TREE)
1670 compute_type = get_compute_type (code, op, type);
1671 if (compute_type == type)
1672 return;
1674 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1676 /* Leave expression untouched for later expansion. */
1677 if (new_rhs == NULL_TREE)
1678 return;
1680 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1681 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1682 new_rhs);
1684 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1685 way to do it is change expand_vector_operation and its callees to
1686 return a tree_code, RHS1 and RHS2 instead of a tree. */
1687 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1688 update_stmt (gsi_stmt (*gsi));
1691 /* Use this to lower vector operations introduced by the vectorizer,
1692 if it may need the bit-twiddling tricks implemented in this file. */
1694 static unsigned int
1695 expand_vector_operations (void)
1697 gimple_stmt_iterator gsi;
1698 basic_block bb;
1699 bool cfg_changed = false;
1701 FOR_EACH_BB_FN (bb, cfun)
1703 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1705 expand_vector_operations_1 (&gsi);
1706 /* ??? If we do not cleanup EH then we will ICE in
1707 verification. But in reality we have created wrong-code
1708 as we did not properly transition EH info and edges to
1709 the piecewise computations. */
1710 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1711 && gimple_purge_dead_eh_edges (bb))
1712 cfg_changed = true;
1716 return cfg_changed ? TODO_cleanup_cfg : 0;
1719 namespace {
1721 const pass_data pass_data_lower_vector =
1723 GIMPLE_PASS, /* type */
1724 "veclower", /* name */
1725 OPTGROUP_VEC, /* optinfo_flags */
1726 TV_NONE, /* tv_id */
1727 PROP_cfg, /* properties_required */
1728 PROP_gimple_lvec, /* properties_provided */
1729 0, /* properties_destroyed */
1730 0, /* todo_flags_start */
1731 TODO_update_ssa, /* todo_flags_finish */
1734 class pass_lower_vector : public gimple_opt_pass
1736 public:
1737 pass_lower_vector (gcc::context *ctxt)
1738 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1741 /* opt_pass methods: */
1742 virtual bool gate (function *fun)
1744 return !(fun->curr_properties & PROP_gimple_lvec);
1747 virtual unsigned int execute (function *)
1749 return expand_vector_operations ();
1752 }; // class pass_lower_vector
1754 } // anon namespace
1756 gimple_opt_pass *
1757 make_pass_lower_vector (gcc::context *ctxt)
1759 return new pass_lower_vector (ctxt);
1762 namespace {
1764 const pass_data pass_data_lower_vector_ssa =
1766 GIMPLE_PASS, /* type */
1767 "veclower2", /* name */
1768 OPTGROUP_VEC, /* optinfo_flags */
1769 TV_NONE, /* tv_id */
1770 PROP_cfg, /* properties_required */
1771 PROP_gimple_lvec, /* properties_provided */
1772 0, /* properties_destroyed */
1773 0, /* todo_flags_start */
1774 ( TODO_update_ssa
1775 | TODO_cleanup_cfg ), /* todo_flags_finish */
1778 class pass_lower_vector_ssa : public gimple_opt_pass
1780 public:
1781 pass_lower_vector_ssa (gcc::context *ctxt)
1782 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1785 /* opt_pass methods: */
1786 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1787 virtual unsigned int execute (function *)
1789 return expand_vector_operations ();
1792 }; // class pass_lower_vector_ssa
1794 } // anon namespace
1796 gimple_opt_pass *
1797 make_pass_lower_vector_ssa (gcc::context *ctxt)
1799 return new pass_lower_vector_ssa (ctxt);
1802 #include "gt-tree-vect-generic.h"