[Ada] Unnesting: handle conditional expressions
[official-gcc.git] / gcc / tree-vect-generic.c
blob8389f5555e81cfab32b3a8e239379bf982d47fc0
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "recog.h" /* FIXME: for insn_data */
46 static void expand_vector_operations_1 (gimple_stmt_iterator *);
48 /* Return the number of elements in a vector type TYPE that we have
49 already decided needs to be expanded piecewise. We don't support
50 this kind of expansion for variable-length vectors, since we should
51 always check for target support before introducing uses of those. */
52 static unsigned int
53 nunits_for_known_piecewise_op (const_tree type)
55 return TYPE_VECTOR_SUBPARTS (type).to_constant ();
58 /* Return true if TYPE1 has more elements than TYPE2, where either
59 type may be a vector or a scalar. */
61 static inline bool
62 subparts_gt (tree type1, tree type2)
64 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
65 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
66 return known_gt (n1, n2);
69 /* Build a constant of type TYPE, made of VALUE's bits replicated
70 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
71 static tree
72 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
74 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
75 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
76 / HOST_BITS_PER_WIDE_INT;
77 unsigned HOST_WIDE_INT low, mask;
78 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
79 int i;
81 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
83 if (width == HOST_BITS_PER_WIDE_INT)
84 low = value;
85 else
87 mask = ((HOST_WIDE_INT)1 << width) - 1;
88 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
91 for (i = 0; i < n; i++)
92 a[i] = low;
94 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
95 return wide_int_to_tree
96 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
99 static GTY(()) tree vector_inner_type;
100 static GTY(()) tree vector_last_type;
101 static GTY(()) int vector_last_nunits;
103 /* Return a suitable vector types made of SUBPARTS units each of mode
104 "word_mode" (the global variable). */
105 static tree
106 build_word_mode_vector_type (int nunits)
108 if (!vector_inner_type)
109 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
110 else if (vector_last_nunits == nunits)
112 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
113 return vector_last_type;
116 vector_last_nunits = nunits;
117 vector_last_type = build_vector_type (vector_inner_type, nunits);
118 return vector_last_type;
121 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
122 tree, tree, tree, tree, tree, enum tree_code,
123 tree);
125 tree
126 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
127 tree t, tree bitsize, tree bitpos)
129 if (TREE_CODE (t) == SSA_NAME)
131 gimple *def_stmt = SSA_NAME_DEF_STMT (t);
132 if (is_gimple_assign (def_stmt)
133 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
134 || (bitpos
135 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
136 t = gimple_assign_rhs1 (def_stmt);
138 if (bitpos)
140 if (TREE_CODE (type) == BOOLEAN_TYPE)
142 tree itype
143 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
144 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
145 bitsize, bitpos);
146 return gimplify_build2 (gsi, NE_EXPR, type, field,
147 build_zero_cst (itype));
149 else
150 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
152 else
153 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
156 static tree
157 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
158 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
159 enum tree_code code, tree type ATTRIBUTE_UNUSED)
161 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
162 return gimplify_build1 (gsi, code, inner_type, a);
165 static tree
166 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
167 tree bitpos, tree bitsize, enum tree_code code,
168 tree type ATTRIBUTE_UNUSED)
170 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
171 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
172 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
173 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
174 return gimplify_build2 (gsi, code, inner_type, a, b);
177 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
179 INNER_TYPE is the type of A and B elements
181 returned expression is of signed integer type with the
182 size equal to the size of INNER_TYPE. */
183 static tree
184 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
185 tree bitpos, tree bitsize, enum tree_code code, tree type)
187 tree stype = TREE_TYPE (type);
188 tree cst_false = build_zero_cst (stype);
189 tree cst_true = build_all_ones_cst (stype);
190 tree cmp;
192 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
193 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
195 cmp = build2 (code, boolean_type_node, a, b);
196 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
199 /* Expand vector addition to scalars. This does bit twiddling
200 in order to increase parallelism:
202 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
203 (a ^ b) & 0x80808080
205 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
206 (a ^ ~b) & 0x80808080
208 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
210 This optimization should be done only if 4 vector items or more
211 fit into a word. */
212 static tree
213 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
214 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
215 enum tree_code code, tree type ATTRIBUTE_UNUSED)
217 tree inner_type = TREE_TYPE (TREE_TYPE (a));
218 unsigned HOST_WIDE_INT max;
219 tree low_bits, high_bits, a_low, b_low, result_low, signs;
221 max = GET_MODE_MASK (TYPE_MODE (inner_type));
222 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
223 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
225 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
226 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
228 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
229 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
230 if (code == PLUS_EXPR)
231 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
232 else
234 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
235 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
238 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
239 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
240 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
243 static tree
244 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
245 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
246 tree bitsize ATTRIBUTE_UNUSED,
247 enum tree_code code ATTRIBUTE_UNUSED,
248 tree type ATTRIBUTE_UNUSED)
250 tree inner_type = TREE_TYPE (TREE_TYPE (b));
251 HOST_WIDE_INT max;
252 tree low_bits, high_bits, b_low, result_low, signs;
254 max = GET_MODE_MASK (TYPE_MODE (inner_type));
255 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
256 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
258 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
260 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
261 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
262 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
263 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
264 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
267 /* Expand a vector operation to scalars, by using many operations
268 whose type is the vector type's inner type. */
269 static tree
270 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
271 tree type, tree inner_type,
272 tree a, tree b, enum tree_code code,
273 tree ret_type = NULL_TREE)
275 vec<constructor_elt, va_gc> *v;
276 tree part_width = TYPE_SIZE (inner_type);
277 tree index = bitsize_int (0);
278 int nunits = nunits_for_known_piecewise_op (type);
279 int delta = tree_to_uhwi (part_width)
280 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
281 int i;
282 location_t loc = gimple_location (gsi_stmt (*gsi));
284 if (ret_type
285 || types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
286 warning_at (loc, OPT_Wvector_operation_performance,
287 "vector operation will be expanded piecewise");
288 else
289 warning_at (loc, OPT_Wvector_operation_performance,
290 "vector operation will be expanded in parallel");
292 if (!ret_type)
293 ret_type = type;
294 vec_alloc (v, (nunits + delta - 1) / delta);
295 for (i = 0; i < nunits;
296 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
298 tree result = f (gsi, inner_type, a, b, index, part_width, code,
299 ret_type);
300 constructor_elt ce = {NULL_TREE, result};
301 v->quick_push (ce);
304 return build_constructor (ret_type, v);
307 /* Expand a vector operation to scalars with the freedom to use
308 a scalar integer type, or to use a different size for the items
309 in the vector type. */
310 static tree
311 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
312 tree a, tree b, enum tree_code code)
314 tree result, compute_type;
315 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
316 location_t loc = gimple_location (gsi_stmt (*gsi));
318 /* We have three strategies. If the type is already correct, just do
319 the operation an element at a time. Else, if the vector is wider than
320 one word, do it a word at a time; finally, if the vector is smaller
321 than one word, do it as a scalar. */
322 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
323 return expand_vector_piecewise (gsi, f,
324 type, TREE_TYPE (type),
325 a, b, code);
326 else if (n_words > 1)
328 tree word_type = build_word_mode_vector_type (n_words);
329 result = expand_vector_piecewise (gsi, f,
330 word_type, TREE_TYPE (word_type),
331 a, b, code);
332 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
333 GSI_SAME_STMT);
335 else
337 /* Use a single scalar operation with a mode no wider than word_mode. */
338 scalar_int_mode mode
339 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
340 compute_type = lang_hooks.types.type_for_mode (mode, 1);
341 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
342 warning_at (loc, OPT_Wvector_operation_performance,
343 "vector operation will be expanded with a "
344 "single scalar operation");
347 return result;
350 /* Expand a vector operation to scalars; for integer types we can use
351 special bit twiddling tricks to do the sums a word at a time, using
352 function F_PARALLEL instead of F. These tricks are done only if
353 they can process at least four items, that is, only if the vector
354 holds at least four items and if a word can hold four items. */
355 static tree
356 expand_vector_addition (gimple_stmt_iterator *gsi,
357 elem_op_func f, elem_op_func f_parallel,
358 tree type, tree a, tree b, enum tree_code code)
360 int parts_per_word = UNITS_PER_WORD
361 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
363 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
364 && parts_per_word >= 4
365 && nunits_for_known_piecewise_op (type) >= 4)
366 return expand_vector_parallel (gsi, f_parallel,
367 type, a, b, code);
368 else
369 return expand_vector_piecewise (gsi, f,
370 type, TREE_TYPE (type),
371 a, b, code);
374 /* Try to expand vector comparison expression OP0 CODE OP1 by
375 querying optab if the following expression:
376 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
377 can be expanded. */
378 static tree
379 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
380 tree op1, enum tree_code code)
382 tree t;
383 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
384 && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
385 t = expand_vector_piecewise (gsi, do_compare, type,
386 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
387 else
388 t = NULL_TREE;
390 return t;
393 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
394 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
395 the result if successful, otherwise return NULL_TREE. */
396 static tree
397 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
399 optab op;
400 unsigned int i, nunits = nunits_for_known_piecewise_op (type);
401 bool scalar_shift = true;
403 for (i = 1; i < nunits; i++)
405 if (shiftcnts[i] != shiftcnts[0])
406 scalar_shift = false;
409 if (scalar_shift && shiftcnts[0] == 0)
410 return op0;
412 if (scalar_shift)
414 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
415 if (op != unknown_optab
416 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
417 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
418 build_int_cst (NULL_TREE, shiftcnts[0]));
421 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
422 if (op != unknown_optab
423 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
425 tree_vector_builder vec (type, nunits, 1);
426 for (i = 0; i < nunits; i++)
427 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
428 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
431 return NULL_TREE;
434 /* Try to expand integer vector division by constant using
435 widening multiply, shifts and additions. */
436 static tree
437 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
438 tree op1, enum tree_code code)
440 bool use_pow2 = true;
441 bool has_vector_shift = true;
442 int mode = -1, this_mode;
443 int pre_shift = -1, post_shift;
444 unsigned int nunits = nunits_for_known_piecewise_op (type);
445 int *shifts = XALLOCAVEC (int, nunits * 4);
446 int *pre_shifts = shifts + nunits;
447 int *post_shifts = pre_shifts + nunits;
448 int *shift_temps = post_shifts + nunits;
449 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
450 int prec = TYPE_PRECISION (TREE_TYPE (type));
451 int dummy_int;
452 unsigned int i;
453 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
454 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
455 tree cur_op, mulcst, tem;
456 optab op;
458 if (prec > HOST_BITS_PER_WIDE_INT)
459 return NULL_TREE;
461 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
462 if (op == unknown_optab
463 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
464 has_vector_shift = false;
466 /* Analysis phase. Determine if all op1 elements are either power
467 of two and it is possible to expand it using shifts (or for remainder
468 using masking). Additionally compute the multiplicative constants
469 and pre and post shifts if the division is to be expanded using
470 widening or high part multiplication plus shifts. */
471 for (i = 0; i < nunits; i++)
473 tree cst = VECTOR_CST_ELT (op1, i);
474 unsigned HOST_WIDE_INT ml;
476 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
477 return NULL_TREE;
478 pre_shifts[i] = 0;
479 post_shifts[i] = 0;
480 mulc[i] = 0;
481 if (use_pow2
482 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
483 use_pow2 = false;
484 if (use_pow2)
486 shifts[i] = tree_log2 (cst);
487 if (shifts[i] != shifts[0]
488 && code == TRUNC_DIV_EXPR
489 && !has_vector_shift)
490 use_pow2 = false;
492 if (mode == -2)
493 continue;
494 if (sign_p == UNSIGNED)
496 unsigned HOST_WIDE_INT mh;
497 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
499 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
500 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
501 return NULL_TREE;
503 if (d <= 1)
505 mode = -2;
506 continue;
509 /* Find a suitable multiplier and right shift count
510 instead of multiplying with D. */
511 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
513 /* If the suggested multiplier is more than SIZE bits, we can
514 do better for even divisors, using an initial right shift. */
515 if ((mh != 0 && (d & 1) == 0)
516 || (!has_vector_shift && pre_shift != -1))
518 if (has_vector_shift)
519 pre_shift = ctz_or_zero (d);
520 else if (pre_shift == -1)
522 unsigned int j;
523 for (j = 0; j < nunits; j++)
525 tree cst2 = VECTOR_CST_ELT (op1, j);
526 unsigned HOST_WIDE_INT d2;
527 int this_pre_shift;
529 if (!tree_fits_uhwi_p (cst2))
530 return NULL_TREE;
531 d2 = tree_to_uhwi (cst2) & mask;
532 if (d2 == 0)
533 return NULL_TREE;
534 this_pre_shift = floor_log2 (d2 & -d2);
535 if (pre_shift == -1 || this_pre_shift < pre_shift)
536 pre_shift = this_pre_shift;
538 if (i != 0 && pre_shift != 0)
540 /* Restart. */
541 i = -1U;
542 mode = -1;
543 continue;
546 if (pre_shift != 0)
548 if ((d >> pre_shift) <= 1)
550 mode = -2;
551 continue;
553 mh = choose_multiplier (d >> pre_shift, prec,
554 prec - pre_shift,
555 &ml, &post_shift, &dummy_int);
556 gcc_assert (!mh);
557 pre_shifts[i] = pre_shift;
560 if (!mh)
561 this_mode = 0;
562 else
563 this_mode = 1;
565 else
567 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
568 unsigned HOST_WIDE_INT abs_d;
570 if (d == -1)
571 return NULL_TREE;
573 /* Since d might be INT_MIN, we have to cast to
574 unsigned HOST_WIDE_INT before negating to avoid
575 undefined signed overflow. */
576 abs_d = (d >= 0
577 ? (unsigned HOST_WIDE_INT) d
578 : - (unsigned HOST_WIDE_INT) d);
580 /* n rem d = n rem -d */
581 if (code == TRUNC_MOD_EXPR && d < 0)
582 d = abs_d;
583 else if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
585 /* This case is not handled correctly below. */
586 mode = -2;
587 continue;
589 if (abs_d <= 1)
591 mode = -2;
592 continue;
595 choose_multiplier (abs_d, prec, prec - 1, &ml,
596 &post_shift, &dummy_int);
597 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
599 this_mode = 4 + (d < 0);
600 ml |= HOST_WIDE_INT_M1U << (prec - 1);
602 else
603 this_mode = 2 + (d < 0);
605 mulc[i] = ml;
606 post_shifts[i] = post_shift;
607 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
608 || post_shift >= prec
609 || pre_shifts[i] >= prec)
610 this_mode = -2;
612 if (i == 0)
613 mode = this_mode;
614 else if (mode != this_mode)
615 mode = -2;
618 if (use_pow2)
620 tree addend = NULL_TREE;
621 if (sign_p == SIGNED)
623 tree uns_type;
625 /* Both division and remainder sequences need
626 op0 < 0 ? mask : 0 computed. It can be either computed as
627 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
628 if none of the shifts is 0, or as the conditional. */
629 for (i = 0; i < nunits; i++)
630 if (shifts[i] == 0)
631 break;
632 uns_type
633 = build_vector_type (build_nonstandard_integer_type (prec, 1),
634 nunits);
635 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
637 for (i = 0; i < nunits; i++)
638 shift_temps[i] = prec - 1;
639 cur_op = add_rshift (gsi, type, op0, shift_temps);
640 if (cur_op != NULL_TREE)
642 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
643 uns_type, cur_op);
644 for (i = 0; i < nunits; i++)
645 shift_temps[i] = prec - shifts[i];
646 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
647 if (cur_op != NULL_TREE)
648 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
649 type, cur_op);
652 if (addend == NULL_TREE
653 && expand_vec_cond_expr_p (type, type, LT_EXPR))
655 tree zero, cst, cond, mask_type;
656 gimple *stmt;
658 mask_type = build_same_sized_truth_vector_type (type);
659 zero = build_zero_cst (type);
660 cond = build2 (LT_EXPR, mask_type, op0, zero);
661 tree_vector_builder vec (type, nunits, 1);
662 for (i = 0; i < nunits; i++)
663 vec.quick_push (build_int_cst (TREE_TYPE (type),
664 (HOST_WIDE_INT_1U
665 << shifts[i]) - 1));
666 cst = vec.build ();
667 addend = make_ssa_name (type);
668 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
669 cst, zero);
670 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
673 if (code == TRUNC_DIV_EXPR)
675 if (sign_p == UNSIGNED)
677 /* q = op0 >> shift; */
678 cur_op = add_rshift (gsi, type, op0, shifts);
679 if (cur_op != NULL_TREE)
680 return cur_op;
682 else if (addend != NULL_TREE)
684 /* t1 = op0 + addend;
685 q = t1 >> shift; */
686 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
687 if (op != unknown_optab
688 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
690 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
691 cur_op = add_rshift (gsi, type, cur_op, shifts);
692 if (cur_op != NULL_TREE)
693 return cur_op;
697 else
699 tree mask;
700 tree_vector_builder vec (type, nunits, 1);
701 for (i = 0; i < nunits; i++)
702 vec.quick_push (build_int_cst (TREE_TYPE (type),
703 (HOST_WIDE_INT_1U
704 << shifts[i]) - 1));
705 mask = vec.build ();
706 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
707 if (op != unknown_optab
708 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
710 if (sign_p == UNSIGNED)
711 /* r = op0 & mask; */
712 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
713 else if (addend != NULL_TREE)
715 /* t1 = op0 + addend;
716 t2 = t1 & mask;
717 r = t2 - addend; */
718 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
719 if (op != unknown_optab
720 && optab_handler (op, TYPE_MODE (type))
721 != CODE_FOR_nothing)
723 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
724 addend);
725 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
726 cur_op, mask);
727 op = optab_for_tree_code (MINUS_EXPR, type,
728 optab_default);
729 if (op != unknown_optab
730 && optab_handler (op, TYPE_MODE (type))
731 != CODE_FOR_nothing)
732 return gimplify_build2 (gsi, MINUS_EXPR, type,
733 cur_op, addend);
740 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
741 return NULL_TREE;
743 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
744 return NULL_TREE;
746 cur_op = op0;
748 switch (mode)
750 case 0:
751 gcc_assert (sign_p == UNSIGNED);
752 /* t1 = oprnd0 >> pre_shift;
753 t2 = t1 h* ml;
754 q = t2 >> post_shift; */
755 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
756 if (cur_op == NULL_TREE)
757 return NULL_TREE;
758 break;
759 case 1:
760 gcc_assert (sign_p == UNSIGNED);
761 for (i = 0; i < nunits; i++)
763 shift_temps[i] = 1;
764 post_shifts[i]--;
766 break;
767 case 2:
768 case 3:
769 case 4:
770 case 5:
771 gcc_assert (sign_p == SIGNED);
772 for (i = 0; i < nunits; i++)
773 shift_temps[i] = prec - 1;
774 break;
775 default:
776 return NULL_TREE;
779 tree_vector_builder vec (type, nunits, 1);
780 for (i = 0; i < nunits; i++)
781 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
782 mulcst = vec.build ();
784 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
786 switch (mode)
788 case 0:
789 /* t1 = oprnd0 >> pre_shift;
790 t2 = t1 h* ml;
791 q = t2 >> post_shift; */
792 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
793 break;
794 case 1:
795 /* t1 = oprnd0 h* ml;
796 t2 = oprnd0 - t1;
797 t3 = t2 >> 1;
798 t4 = t1 + t3;
799 q = t4 >> (post_shift - 1); */
800 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
801 if (op == unknown_optab
802 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
803 return NULL_TREE;
804 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
805 tem = add_rshift (gsi, type, tem, shift_temps);
806 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
807 if (op == unknown_optab
808 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
809 return NULL_TREE;
810 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
811 cur_op = add_rshift (gsi, type, tem, post_shifts);
812 if (cur_op == NULL_TREE)
813 return NULL_TREE;
814 break;
815 case 2:
816 case 3:
817 case 4:
818 case 5:
819 /* t1 = oprnd0 h* ml;
820 t2 = t1; [ iff (mode & 2) != 0 ]
821 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
822 t3 = t2 >> post_shift;
823 t4 = oprnd0 >> (prec - 1);
824 q = t3 - t4; [ iff (mode & 1) == 0 ]
825 q = t4 - t3; [ iff (mode & 1) != 0 ] */
826 if ((mode & 2) == 0)
828 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
829 if (op == unknown_optab
830 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
831 return NULL_TREE;
832 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
834 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
835 if (cur_op == NULL_TREE)
836 return NULL_TREE;
837 tem = add_rshift (gsi, type, op0, shift_temps);
838 if (tem == NULL_TREE)
839 return NULL_TREE;
840 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
841 if (op == unknown_optab
842 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
843 return NULL_TREE;
844 if ((mode & 1) == 0)
845 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
846 else
847 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
848 break;
849 default:
850 gcc_unreachable ();
853 if (code == TRUNC_DIV_EXPR)
854 return cur_op;
856 /* We divided. Now finish by:
857 t1 = q * oprnd1;
858 r = oprnd0 - t1; */
859 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
860 if (op == unknown_optab
861 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
862 return NULL_TREE;
863 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
864 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
865 if (op == unknown_optab
866 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
867 return NULL_TREE;
868 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
871 /* Expand a vector condition to scalars, by using many conditions
872 on the vector's elements. */
873 static void
874 expand_vector_condition (gimple_stmt_iterator *gsi)
876 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
877 tree type = gimple_expr_type (stmt);
878 tree a = gimple_assign_rhs1 (stmt);
879 tree a1 = a;
880 tree a2 = NULL_TREE;
881 bool a_is_comparison = false;
882 tree b = gimple_assign_rhs2 (stmt);
883 tree c = gimple_assign_rhs3 (stmt);
884 vec<constructor_elt, va_gc> *v;
885 tree constr;
886 tree inner_type = TREE_TYPE (type);
887 tree cond_type = TREE_TYPE (TREE_TYPE (a));
888 tree comp_inner_type = cond_type;
889 tree width = TYPE_SIZE (inner_type);
890 tree index = bitsize_int (0);
891 tree comp_width = width;
892 tree comp_index = index;
893 int i;
894 location_t loc = gimple_location (gsi_stmt (*gsi));
896 if (!is_gimple_val (a))
898 gcc_assert (COMPARISON_CLASS_P (a));
899 a_is_comparison = true;
900 a1 = TREE_OPERAND (a, 0);
901 a2 = TREE_OPERAND (a, 1);
902 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
903 comp_width = TYPE_SIZE (comp_inner_type);
906 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
907 return;
909 /* Handle vector boolean types with bitmasks. If there is a comparison
910 and we can expand the comparison into the vector boolean bitmask,
911 or otherwise if it is compatible with type, we can transform
912 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
913 into
914 tmp_6 = x_2 < y_3;
915 tmp_7 = tmp_6 & vbfld_4;
916 tmp_8 = ~tmp_6;
917 tmp_9 = tmp_8 & vbfld_5;
918 vbfld_1 = tmp_7 | tmp_9;
919 Similarly for vbfld_10 instead of x_2 < y_3. */
920 if (VECTOR_BOOLEAN_TYPE_P (type)
921 && SCALAR_INT_MODE_P (TYPE_MODE (type))
922 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
923 TYPE_VECTOR_SUBPARTS (type)
924 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
925 && (a_is_comparison
926 ? useless_type_conversion_p (type, TREE_TYPE (a))
927 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
929 if (a_is_comparison)
930 a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
931 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
932 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
933 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
934 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
935 gimple_assign_set_rhs_from_tree (gsi, a);
936 update_stmt (gsi_stmt (*gsi));
937 return;
940 /* TODO: try and find a smaller vector type. */
942 warning_at (loc, OPT_Wvector_operation_performance,
943 "vector condition will be expanded piecewise");
945 int nunits = nunits_for_known_piecewise_op (type);
946 vec_alloc (v, nunits);
947 for (i = 0; i < nunits; i++)
949 tree aa, result;
950 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
951 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
952 if (a_is_comparison)
954 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
955 comp_width, comp_index);
956 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
957 comp_width, comp_index);
958 aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
960 else
961 aa = tree_vec_extract (gsi, cond_type, a, width, index);
962 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
963 constructor_elt ce = {NULL_TREE, result};
964 v->quick_push (ce);
965 index = int_const_binop (PLUS_EXPR, index, width);
966 if (width == comp_width)
967 comp_index = index;
968 else
969 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
972 constr = build_constructor (type, v);
973 gimple_assign_set_rhs_from_tree (gsi, constr);
974 update_stmt (gsi_stmt (*gsi));
977 static tree
978 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
979 gassign *assign, enum tree_code code)
981 machine_mode compute_mode = TYPE_MODE (compute_type);
983 /* If the compute mode is not a vector mode (hence we are not decomposing
984 a BLKmode vector to smaller, hardware-supported vectors), we may want
985 to expand the operations in parallel. */
986 if (!VECTOR_MODE_P (compute_mode))
987 switch (code)
989 case PLUS_EXPR:
990 case MINUS_EXPR:
991 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
992 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
993 gimple_assign_rhs1 (assign),
994 gimple_assign_rhs2 (assign), code);
995 break;
997 case NEGATE_EXPR:
998 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
999 return expand_vector_addition (gsi, do_unop, do_negate, type,
1000 gimple_assign_rhs1 (assign),
1001 NULL_TREE, code);
1002 break;
1004 case BIT_AND_EXPR:
1005 case BIT_IOR_EXPR:
1006 case BIT_XOR_EXPR:
1007 return expand_vector_parallel (gsi, do_binop, type,
1008 gimple_assign_rhs1 (assign),
1009 gimple_assign_rhs2 (assign), code);
1011 case BIT_NOT_EXPR:
1012 return expand_vector_parallel (gsi, do_unop, type,
1013 gimple_assign_rhs1 (assign),
1014 NULL_TREE, code);
1015 case EQ_EXPR:
1016 case NE_EXPR:
1017 case GT_EXPR:
1018 case LT_EXPR:
1019 case GE_EXPR:
1020 case LE_EXPR:
1021 case UNEQ_EXPR:
1022 case UNGT_EXPR:
1023 case UNLT_EXPR:
1024 case UNGE_EXPR:
1025 case UNLE_EXPR:
1026 case LTGT_EXPR:
1027 case ORDERED_EXPR:
1028 case UNORDERED_EXPR:
1030 tree rhs1 = gimple_assign_rhs1 (assign);
1031 tree rhs2 = gimple_assign_rhs2 (assign);
1033 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1036 case TRUNC_DIV_EXPR:
1037 case TRUNC_MOD_EXPR:
1039 tree rhs1 = gimple_assign_rhs1 (assign);
1040 tree rhs2 = gimple_assign_rhs2 (assign);
1041 tree ret;
1043 if (!optimize
1044 || !VECTOR_INTEGER_TYPE_P (type)
1045 || TREE_CODE (rhs2) != VECTOR_CST
1046 || !VECTOR_MODE_P (TYPE_MODE (type)))
1047 break;
1049 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1050 if (ret != NULL_TREE)
1051 return ret;
1052 break;
1055 default:
1056 break;
1059 if (TREE_CODE_CLASS (code) == tcc_unary)
1060 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1061 gimple_assign_rhs1 (assign),
1062 NULL_TREE, code);
1063 else
1064 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1065 gimple_assign_rhs1 (assign),
1066 gimple_assign_rhs2 (assign), code);
1069 /* Try to optimize
1070 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1071 style stmts into:
1072 _9 = { b_7, b_7, b_7, b_7 };
1073 a_5 = _9 + { 0, 3, 6, 9 };
1074 because vector splat operation is usually more efficient
1075 than piecewise initialization of the vector. */
1077 static void
1078 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1080 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1081 tree lhs = gimple_assign_lhs (stmt);
1082 tree rhs = gimple_assign_rhs1 (stmt);
1083 tree type = TREE_TYPE (rhs);
1084 unsigned int i, j;
1085 unsigned HOST_WIDE_INT nelts;
1086 bool all_same = true;
1087 constructor_elt *elt;
1088 gimple *g;
1089 tree base = NULL_TREE;
1090 optab op;
1092 if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1093 || nelts <= 2
1094 || CONSTRUCTOR_NELTS (rhs) != nelts)
1095 return;
1096 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1097 if (op == unknown_optab
1098 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1099 return;
1100 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1101 if (TREE_CODE (elt->value) != SSA_NAME
1102 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1103 return;
1104 else
1106 tree this_base = elt->value;
1107 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1108 all_same = false;
1109 for (j = 0; j < nelts + 1; j++)
1111 g = SSA_NAME_DEF_STMT (this_base);
1112 if (is_gimple_assign (g)
1113 && gimple_assign_rhs_code (g) == PLUS_EXPR
1114 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1115 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1116 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1117 this_base = gimple_assign_rhs1 (g);
1118 else
1119 break;
1121 if (i == 0)
1122 base = this_base;
1123 else if (this_base != base)
1124 return;
1126 if (all_same)
1127 return;
1128 tree_vector_builder cst (type, nelts, 1);
1129 for (i = 0; i < nelts; i++)
1131 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1132 tree elt = build_zero_cst (TREE_TYPE (base));
1133 while (this_base != base)
1135 g = SSA_NAME_DEF_STMT (this_base);
1136 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1137 elt, gimple_assign_rhs2 (g));
1138 if (elt == NULL_TREE
1139 || TREE_CODE (elt) != INTEGER_CST
1140 || TREE_OVERFLOW (elt))
1141 return;
1142 this_base = gimple_assign_rhs1 (g);
1144 cst.quick_push (elt);
1146 for (i = 0; i < nelts; i++)
1147 CONSTRUCTOR_ELT (rhs, i)->value = base;
1148 g = gimple_build_assign (make_ssa_name (type), rhs);
1149 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1150 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1151 cst.build ());
1152 gsi_replace (gsi, g, false);
1155 /* Return a type for the widest vector mode whose components are of type
1156 TYPE, or NULL_TREE if none is found. */
1158 static tree
1159 type_for_widest_vector_mode (tree type, optab op)
1161 machine_mode inner_mode = TYPE_MODE (type);
1162 machine_mode best_mode = VOIDmode, mode;
1163 poly_int64 best_nunits = 0;
1165 if (SCALAR_FLOAT_MODE_P (inner_mode))
1166 mode = MIN_MODE_VECTOR_FLOAT;
1167 else if (SCALAR_FRACT_MODE_P (inner_mode))
1168 mode = MIN_MODE_VECTOR_FRACT;
1169 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1170 mode = MIN_MODE_VECTOR_UFRACT;
1171 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1172 mode = MIN_MODE_VECTOR_ACCUM;
1173 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1174 mode = MIN_MODE_VECTOR_UACCUM;
1175 else if (inner_mode == BImode)
1176 mode = MIN_MODE_VECTOR_BOOL;
1177 else
1178 mode = MIN_MODE_VECTOR_INT;
1180 FOR_EACH_MODE_FROM (mode, mode)
1181 if (GET_MODE_INNER (mode) == inner_mode
1182 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1183 && optab_handler (op, mode) != CODE_FOR_nothing)
1184 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1186 if (best_mode == VOIDmode)
1187 return NULL_TREE;
1188 else
1189 return build_vector_type_for_mode (type, best_mode);
1193 /* Build a reference to the element of the vector VECT. Function
1194 returns either the element itself, either BIT_FIELD_REF, or an
1195 ARRAY_REF expression.
1197 GSI is required to insert temporary variables while building a
1198 refernece to the element of the vector VECT.
1200 PTMPVEC is a pointer to the temporary variable for caching
1201 purposes. In case when PTMPVEC is NULL new temporary variable
1202 will be created. */
1203 static tree
1204 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1206 tree vect_type, vect_elt_type;
1207 gimple *asgn;
1208 tree tmpvec;
1209 tree arraytype;
1210 bool need_asgn = true;
1211 unsigned int elements;
1213 vect_type = TREE_TYPE (vect);
1214 vect_elt_type = TREE_TYPE (vect_type);
1215 elements = nunits_for_known_piecewise_op (vect_type);
1217 if (TREE_CODE (idx) == INTEGER_CST)
1219 unsigned HOST_WIDE_INT index;
1221 /* Given that we're about to compute a binary modulus,
1222 we don't care about the high bits of the value. */
1223 index = TREE_INT_CST_LOW (idx);
1224 if (!tree_fits_uhwi_p (idx) || index >= elements)
1226 index &= elements - 1;
1227 idx = build_int_cst (TREE_TYPE (idx), index);
1230 /* When lowering a vector statement sequence do some easy
1231 simplification by looking through intermediate vector results. */
1232 if (TREE_CODE (vect) == SSA_NAME)
1234 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1235 if (is_gimple_assign (def_stmt)
1236 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1237 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1238 vect = gimple_assign_rhs1 (def_stmt);
1241 if (TREE_CODE (vect) == VECTOR_CST)
1242 return VECTOR_CST_ELT (vect, index);
1243 else if (TREE_CODE (vect) == CONSTRUCTOR
1244 && (CONSTRUCTOR_NELTS (vect) == 0
1245 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1246 != VECTOR_TYPE))
1248 if (index < CONSTRUCTOR_NELTS (vect))
1249 return CONSTRUCTOR_ELT (vect, index)->value;
1250 return build_zero_cst (vect_elt_type);
1252 else
1254 tree size = TYPE_SIZE (vect_elt_type);
1255 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1256 size);
1257 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1261 if (!ptmpvec)
1262 tmpvec = create_tmp_var (vect_type, "vectmp");
1263 else if (!*ptmpvec)
1264 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1265 else
1267 tmpvec = *ptmpvec;
1268 need_asgn = false;
1271 if (need_asgn)
1273 TREE_ADDRESSABLE (tmpvec) = 1;
1274 asgn = gimple_build_assign (tmpvec, vect);
1275 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1278 arraytype = build_array_type_nelts (vect_elt_type, elements);
1279 return build4 (ARRAY_REF, vect_elt_type,
1280 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1281 idx, NULL_TREE, NULL_TREE);
1284 /* Check if VEC_PERM_EXPR within the given setting is supported
1285 by hardware, or lower it piecewise.
1287 When VEC_PERM_EXPR has the same first and second operands:
1288 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1289 {v0[mask[0]], v0[mask[1]], ...}
1290 MASK and V0 must have the same number of elements.
1292 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1293 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1294 V0 and V1 must have the same type. MASK, V0, V1 must have the
1295 same number of arguments. */
1297 static void
1298 lower_vec_perm (gimple_stmt_iterator *gsi)
1300 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1301 tree mask = gimple_assign_rhs3 (stmt);
1302 tree vec0 = gimple_assign_rhs1 (stmt);
1303 tree vec1 = gimple_assign_rhs2 (stmt);
1304 tree vect_type = TREE_TYPE (vec0);
1305 tree mask_type = TREE_TYPE (mask);
1306 tree vect_elt_type = TREE_TYPE (vect_type);
1307 tree mask_elt_type = TREE_TYPE (mask_type);
1308 unsigned HOST_WIDE_INT elements;
1309 vec<constructor_elt, va_gc> *v;
1310 tree constr, t, si, i_val;
1311 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1312 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1313 location_t loc = gimple_location (gsi_stmt (*gsi));
1314 unsigned i;
1316 if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1317 return;
1319 if (TREE_CODE (mask) == SSA_NAME)
1321 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1322 if (is_gimple_assign (def_stmt)
1323 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1324 mask = gimple_assign_rhs1 (def_stmt);
1327 vec_perm_builder sel_int;
1329 if (TREE_CODE (mask) == VECTOR_CST
1330 && tree_to_vec_perm_builder (&sel_int, mask))
1332 vec_perm_indices indices (sel_int, 2, elements);
1333 if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1335 gimple_assign_set_rhs3 (stmt, mask);
1336 update_stmt (stmt);
1337 return;
1339 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1340 vector as VEC1 and a right element shift MASK. */
1341 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1342 != CODE_FOR_nothing
1343 && TREE_CODE (vec1) == VECTOR_CST
1344 && initializer_zerop (vec1)
1345 && maybe_ne (indices[0], 0)
1346 && known_lt (poly_uint64 (indices[0]), elements))
1348 bool ok_p = indices.series_p (0, 1, indices[0], 1);
1349 if (!ok_p)
1351 for (i = 1; i < elements; ++i)
1353 poly_uint64 actual = indices[i];
1354 poly_uint64 expected = i + indices[0];
1355 /* Indices into the second vector are all equivalent. */
1356 if (maybe_lt (actual, elements)
1357 ? maybe_ne (actual, expected)
1358 : maybe_lt (expected, elements))
1359 break;
1361 ok_p = i == elements;
1363 if (ok_p)
1365 gimple_assign_set_rhs3 (stmt, mask);
1366 update_stmt (stmt);
1367 return;
1370 /* And similarly vec_shl pattern. */
1371 if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1372 != CODE_FOR_nothing
1373 && TREE_CODE (vec0) == VECTOR_CST
1374 && initializer_zerop (vec0))
1376 unsigned int first = 0;
1377 for (i = 0; i < elements; ++i)
1378 if (known_eq (poly_uint64 (indices[i]), elements))
1380 if (i == 0 || first)
1381 break;
1382 first = i;
1384 else if (first
1385 ? maybe_ne (poly_uint64 (indices[i]),
1386 elements + i - first)
1387 : maybe_ge (poly_uint64 (indices[i]), elements))
1388 break;
1389 if (i == elements)
1391 gimple_assign_set_rhs3 (stmt, mask);
1392 update_stmt (stmt);
1393 return;
1397 else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1398 return;
1400 warning_at (loc, OPT_Wvector_operation_performance,
1401 "vector shuffling operation will be expanded piecewise");
1403 vec_alloc (v, elements);
1404 for (i = 0; i < elements; i++)
1406 si = size_int (i);
1407 i_val = vector_element (gsi, mask, si, &masktmp);
1409 if (TREE_CODE (i_val) == INTEGER_CST)
1411 unsigned HOST_WIDE_INT index;
1413 index = TREE_INT_CST_LOW (i_val);
1414 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1415 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1417 if (two_operand_p && (index & elements) != 0)
1418 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1419 else
1420 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1422 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1423 true, GSI_SAME_STMT);
1425 else
1427 tree cond = NULL_TREE, v0_val;
1429 if (two_operand_p)
1431 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1432 build_int_cst (mask_elt_type, elements));
1433 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1434 true, GSI_SAME_STMT);
1437 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1438 build_int_cst (mask_elt_type, elements - 1));
1439 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1440 true, GSI_SAME_STMT);
1442 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1443 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1444 true, GSI_SAME_STMT);
1446 if (two_operand_p)
1448 tree v1_val;
1450 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1451 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1452 true, GSI_SAME_STMT);
1454 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1455 cond, build_zero_cst (mask_elt_type));
1456 cond = fold_build3 (COND_EXPR, vect_elt_type,
1457 cond, v0_val, v1_val);
1458 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1459 true, GSI_SAME_STMT);
1461 else
1462 t = v0_val;
1465 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1468 constr = build_constructor (vect_type, v);
1469 gimple_assign_set_rhs_from_tree (gsi, constr);
1470 update_stmt (gsi_stmt (*gsi));
1473 /* If OP is a uniform vector return the element it is a splat from. */
1475 static tree
1476 ssa_uniform_vector_p (tree op)
1478 if (TREE_CODE (op) == VECTOR_CST
1479 || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1480 || TREE_CODE (op) == CONSTRUCTOR)
1481 return uniform_vector_p (op);
1482 if (TREE_CODE (op) == SSA_NAME)
1484 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1485 if (gimple_assign_single_p (def_stmt))
1486 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1488 return NULL_TREE;
1491 /* Return type in which CODE operation with optab OP can be
1492 computed. */
1494 static tree
1495 get_compute_type (enum tree_code code, optab op, tree type)
1497 /* For very wide vectors, try using a smaller vector mode. */
1498 tree compute_type = type;
1499 if (op
1500 && (!VECTOR_MODE_P (TYPE_MODE (type))
1501 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1503 tree vector_compute_type
1504 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1505 if (vector_compute_type != NULL_TREE
1506 && subparts_gt (compute_type, vector_compute_type)
1507 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1508 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1509 != CODE_FOR_nothing))
1510 compute_type = vector_compute_type;
1513 /* If we are breaking a BLKmode vector into smaller pieces,
1514 type_for_widest_vector_mode has already looked into the optab,
1515 so skip these checks. */
1516 if (compute_type == type)
1518 machine_mode compute_mode = TYPE_MODE (compute_type);
1519 if (VECTOR_MODE_P (compute_mode))
1521 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1522 return compute_type;
1523 if (code == MULT_HIGHPART_EXPR
1524 && can_mult_highpart_p (compute_mode,
1525 TYPE_UNSIGNED (compute_type)))
1526 return compute_type;
1528 /* There is no operation in hardware, so fall back to scalars. */
1529 compute_type = TREE_TYPE (type);
1532 return compute_type;
1535 static tree
1536 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1537 tree bitpos, tree bitsize, enum tree_code code,
1538 tree type ATTRIBUTE_UNUSED)
1540 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1541 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1542 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1543 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1544 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1545 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1548 /* Expand a vector COND_EXPR to scalars, piecewise. */
1549 static void
1550 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1552 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1553 tree type = gimple_expr_type (stmt);
1554 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1555 machine_mode compute_mode = TYPE_MODE (compute_type);
1556 gcc_assert (compute_mode != BLKmode);
1557 tree lhs = gimple_assign_lhs (stmt);
1558 tree rhs2 = gimple_assign_rhs2 (stmt);
1559 tree rhs3 = gimple_assign_rhs3 (stmt);
1560 tree new_rhs;
1562 /* If the compute mode is not a vector mode (hence we are not decomposing
1563 a BLKmode vector to smaller, hardware-supported vectors), we may want
1564 to expand the operations in parallel. */
1565 if (!VECTOR_MODE_P (compute_mode))
1566 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1567 COND_EXPR);
1568 else
1569 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1570 rhs2, rhs3, COND_EXPR);
1571 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1572 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1573 new_rhs);
1575 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1576 way to do it is change expand_vector_operation and its callees to
1577 return a tree_code, RHS1 and RHS2 instead of a tree. */
1578 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1579 update_stmt (gsi_stmt (*gsi));
1582 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1583 lowering. If INNER_TYPE is not a vector type, this is a scalar
1584 fallback. */
1586 static tree
1587 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1588 tree decl, tree bitpos, tree bitsize,
1589 enum tree_code code, tree type)
1591 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1592 if (!VECTOR_TYPE_P (inner_type))
1593 return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1594 if (code == CALL_EXPR)
1596 gimple *g = gimple_build_call (decl, 1, a);
1597 tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1598 gimple_call_set_lhs (g, lhs);
1599 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1600 return lhs;
1602 else
1604 tree outer_type = build_vector_type (TREE_TYPE (type),
1605 TYPE_VECTOR_SUBPARTS (inner_type));
1606 return gimplify_build1 (gsi, code, outer_type, a);
1610 /* Similarly, but for narrowing conversion. */
1612 static tree
1613 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1614 tree, tree bitpos, tree, enum tree_code code,
1615 tree type)
1617 tree itype = build_vector_type (TREE_TYPE (inner_type),
1618 exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1619 2));
1620 tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1621 tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1622 int_const_binop (PLUS_EXPR, bitpos,
1623 TYPE_SIZE (itype)));
1624 tree outer_type = build_vector_type (TREE_TYPE (type),
1625 TYPE_VECTOR_SUBPARTS (inner_type));
1626 return gimplify_build2 (gsi, code, outer_type, b, c);
1629 /* Expand VEC_CONVERT ifn call. */
1631 static void
1632 expand_vector_conversion (gimple_stmt_iterator *gsi)
1634 gimple *stmt = gsi_stmt (*gsi);
1635 gimple *g;
1636 tree lhs = gimple_call_lhs (stmt);
1637 tree arg = gimple_call_arg (stmt, 0);
1638 tree decl = NULL_TREE;
1639 tree ret_type = TREE_TYPE (lhs);
1640 tree arg_type = TREE_TYPE (arg);
1641 tree new_rhs, compute_type = TREE_TYPE (arg_type);
1642 enum tree_code code = NOP_EXPR;
1643 enum tree_code code1 = ERROR_MARK;
1644 enum { NARROW, NONE, WIDEN } modifier = NONE;
1645 optab optab1 = unknown_optab;
1647 gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1648 gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (ret_type))));
1649 gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (arg_type))));
1650 if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1651 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1652 code = FIX_TRUNC_EXPR;
1653 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1654 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1655 code = FLOAT_EXPR;
1656 if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
1657 < tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
1658 modifier = NARROW;
1659 else if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
1660 > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
1661 modifier = WIDEN;
1663 if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1665 if (supportable_convert_operation (code, ret_type, arg_type, &decl,
1666 &code1))
1668 if (code1 == CALL_EXPR)
1670 g = gimple_build_call (decl, 1, arg);
1671 gimple_call_set_lhs (g, lhs);
1673 else
1674 g = gimple_build_assign (lhs, code1, arg);
1675 gsi_replace (gsi, g, false);
1676 return;
1678 /* Can't use get_compute_type here, as supportable_convert_operation
1679 doesn't necessarily use an optab and needs two arguments. */
1680 tree vec_compute_type
1681 = type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
1682 if (vec_compute_type
1683 && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
1684 && subparts_gt (arg_type, vec_compute_type))
1686 unsigned HOST_WIDE_INT nelts
1687 = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1688 while (nelts > 1)
1690 tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1691 tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1692 if (supportable_convert_operation (code, ret1_type, arg1_type,
1693 &decl, &code1))
1695 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1696 ret_type, arg1_type, arg,
1697 decl, code1);
1698 g = gimple_build_assign (lhs, new_rhs);
1699 gsi_replace (gsi, g, false);
1700 return;
1702 nelts = nelts / 2;
1706 else if (modifier == NARROW)
1708 switch (code)
1710 CASE_CONVERT:
1711 code1 = VEC_PACK_TRUNC_EXPR;
1712 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1713 break;
1714 case FIX_TRUNC_EXPR:
1715 code1 = VEC_PACK_FIX_TRUNC_EXPR;
1716 /* The signedness is determined from output operand. */
1717 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1718 break;
1719 case FLOAT_EXPR:
1720 code1 = VEC_PACK_FLOAT_EXPR;
1721 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1722 break;
1723 default:
1724 gcc_unreachable ();
1727 if (optab1)
1728 compute_type = get_compute_type (code1, optab1, arg_type);
1729 enum insn_code icode1;
1730 if (VECTOR_TYPE_P (compute_type)
1731 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1732 != CODE_FOR_nothing)
1733 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1735 tree cretd_type
1736 = build_vector_type (TREE_TYPE (ret_type),
1737 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1738 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1740 if (compute_type == arg_type)
1742 new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1743 arg, build_zero_cst (arg_type));
1744 new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1745 TYPE_SIZE (ret_type),
1746 bitsize_int (0));
1747 g = gimple_build_assign (lhs, new_rhs);
1748 gsi_replace (gsi, g, false);
1749 return;
1751 tree dcompute_type
1752 = build_vector_type (TREE_TYPE (compute_type),
1753 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1754 if (TYPE_MAIN_VARIANT (dcompute_type)
1755 == TYPE_MAIN_VARIANT (arg_type))
1756 new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1757 NULL_TREE, bitsize_int (0),
1758 NULL_TREE, code1,
1759 ret_type);
1760 else
1761 new_rhs = expand_vector_piecewise (gsi,
1762 do_vec_narrow_conversion,
1763 arg_type, dcompute_type,
1764 arg, NULL_TREE, code1,
1765 ret_type);
1766 g = gimple_build_assign (lhs, new_rhs);
1767 gsi_replace (gsi, g, false);
1768 return;
1772 else if (modifier == WIDEN)
1774 enum tree_code code2 = ERROR_MARK;
1775 optab optab2 = unknown_optab;
1776 switch (code)
1778 CASE_CONVERT:
1779 code1 = VEC_UNPACK_LO_EXPR;
1780 code2 = VEC_UNPACK_HI_EXPR;
1781 break;
1782 case FIX_TRUNC_EXPR:
1783 code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
1784 code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
1785 break;
1786 case FLOAT_EXPR:
1787 code1 = VEC_UNPACK_FLOAT_LO_EXPR;
1788 code2 = VEC_UNPACK_FLOAT_HI_EXPR;
1789 break;
1790 default:
1791 gcc_unreachable ();
1793 if (BYTES_BIG_ENDIAN)
1794 std::swap (code1, code2);
1796 if (code == FIX_TRUNC_EXPR)
1798 /* The signedness is determined from output operand. */
1799 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1800 optab2 = optab_for_tree_code (code2, ret_type, optab_default);
1802 else
1804 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1805 optab2 = optab_for_tree_code (code2, arg_type, optab_default);
1808 if (optab1 && optab2)
1809 compute_type = get_compute_type (code1, optab1, arg_type);
1811 enum insn_code icode1, icode2;
1812 if (VECTOR_TYPE_P (compute_type)
1813 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1814 != CODE_FOR_nothing)
1815 && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
1816 != CODE_FOR_nothing)
1817 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
1818 && (insn_data[icode1].operand[0].mode
1819 == insn_data[icode2].operand[0].mode))
1821 poly_uint64 nunits
1822 = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
1823 tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
1824 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1826 vec<constructor_elt, va_gc> *v;
1827 tree part_width = TYPE_SIZE (compute_type);
1828 tree index = bitsize_int (0);
1829 int nunits = nunits_for_known_piecewise_op (arg_type);
1830 int delta = tree_to_uhwi (part_width)
1831 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)));
1832 int i;
1833 location_t loc = gimple_location (gsi_stmt (*gsi));
1835 if (compute_type != arg_type)
1836 warning_at (loc, OPT_Wvector_operation_performance,
1837 "vector operation will be expanded piecewise");
1838 else
1840 nunits = 1;
1841 delta = 1;
1844 vec_alloc (v, (nunits + delta - 1) / delta * 2);
1845 for (i = 0; i < nunits;
1846 i += delta, index = int_const_binop (PLUS_EXPR, index,
1847 part_width))
1849 tree a = arg;
1850 if (compute_type != arg_type)
1851 a = tree_vec_extract (gsi, compute_type, a, part_width,
1852 index);
1853 tree result = gimplify_build1 (gsi, code1, cretd_type, a);
1854 constructor_elt ce = { NULL_TREE, result };
1855 v->quick_push (ce);
1856 ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
1857 v->quick_push (ce);
1860 new_rhs = build_constructor (ret_type, v);
1861 g = gimple_build_assign (lhs, new_rhs);
1862 gsi_replace (gsi, g, false);
1863 return;
1868 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
1869 TREE_TYPE (arg_type), arg,
1870 NULL_TREE, code, ret_type);
1871 g = gimple_build_assign (lhs, new_rhs);
1872 gsi_replace (gsi, g, false);
1875 /* Process one statement. If we identify a vector operation, expand it. */
1877 static void
1878 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1880 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1881 enum tree_code code;
1882 optab op = unknown_optab;
1883 enum gimple_rhs_class rhs_class;
1884 tree new_rhs;
1886 /* Only consider code == GIMPLE_ASSIGN. */
1887 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1888 if (!stmt)
1890 if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
1891 expand_vector_conversion (gsi);
1892 return;
1895 code = gimple_assign_rhs_code (stmt);
1896 rhs_class = get_gimple_rhs_class (code);
1897 lhs = gimple_assign_lhs (stmt);
1899 if (code == VEC_PERM_EXPR)
1901 lower_vec_perm (gsi);
1902 return;
1905 if (code == VEC_COND_EXPR)
1907 expand_vector_condition (gsi);
1908 return;
1911 if (code == COND_EXPR
1912 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1913 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1915 expand_vector_scalar_condition (gsi);
1916 return;
1919 if (code == CONSTRUCTOR
1920 && TREE_CODE (lhs) == SSA_NAME
1921 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1922 && !gimple_clobber_p (stmt)
1923 && optimize)
1925 optimize_vector_constructor (gsi);
1926 return;
1929 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1930 return;
1932 rhs1 = gimple_assign_rhs1 (stmt);
1933 type = gimple_expr_type (stmt);
1934 if (rhs_class == GIMPLE_BINARY_RHS)
1935 rhs2 = gimple_assign_rhs2 (stmt);
1937 if (!VECTOR_TYPE_P (type)
1938 || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
1939 return;
1941 /* A scalar operation pretending to be a vector one. */
1942 if (VECTOR_BOOLEAN_TYPE_P (type)
1943 && !VECTOR_MODE_P (TYPE_MODE (type))
1944 && TYPE_MODE (type) != BLKmode)
1945 return;
1947 /* If the vector operation is operating on all same vector elements
1948 implement it with a scalar operation and a splat if the target
1949 supports the scalar operation. */
1950 tree srhs1, srhs2 = NULL_TREE;
1951 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
1952 && (rhs2 == NULL_TREE
1953 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
1954 && (srhs2 = rhs2))
1955 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1956 /* As we query direct optabs restrict to non-convert operations. */
1957 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
1959 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
1960 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
1961 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
1963 tree slhs = make_ssa_name (TREE_TYPE (srhs1));
1964 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
1965 gsi_insert_before (gsi, repl, GSI_SAME_STMT);
1966 gimple_assign_set_rhs_from_tree (gsi,
1967 build_vector_from_val (type, slhs));
1968 update_stmt (stmt);
1969 return;
1973 if (CONVERT_EXPR_CODE_P (code)
1974 || code == FLOAT_EXPR
1975 || code == FIX_TRUNC_EXPR
1976 || code == VIEW_CONVERT_EXPR)
1977 return;
1979 /* The signedness is determined from input argument. */
1980 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1981 || code == VEC_UNPACK_FLOAT_LO_EXPR
1982 || code == VEC_PACK_FLOAT_EXPR)
1984 /* We do not know how to scalarize those. */
1985 return;
1988 /* For widening/narrowing vector operations, the relevant type is of the
1989 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1990 calculated in the same way above. */
1991 if (code == WIDEN_SUM_EXPR
1992 || code == VEC_WIDEN_MULT_HI_EXPR
1993 || code == VEC_WIDEN_MULT_LO_EXPR
1994 || code == VEC_WIDEN_MULT_EVEN_EXPR
1995 || code == VEC_WIDEN_MULT_ODD_EXPR
1996 || code == VEC_UNPACK_HI_EXPR
1997 || code == VEC_UNPACK_LO_EXPR
1998 || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
1999 || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2000 || code == VEC_PACK_TRUNC_EXPR
2001 || code == VEC_PACK_SAT_EXPR
2002 || code == VEC_PACK_FIX_TRUNC_EXPR
2003 || code == VEC_WIDEN_LSHIFT_HI_EXPR
2004 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2006 /* We do not know how to scalarize those. */
2007 return;
2010 /* Choose between vector shift/rotate by vector and vector shift/rotate by
2011 scalar */
2012 if (code == LSHIFT_EXPR
2013 || code == RSHIFT_EXPR
2014 || code == LROTATE_EXPR
2015 || code == RROTATE_EXPR)
2017 optab opv;
2019 /* Check whether we have vector <op> {x,x,x,x} where x
2020 could be a scalar variable or a constant. Transform
2021 vector <op> {x,x,x,x} ==> vector <op> scalar. */
2022 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2024 tree first;
2026 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2028 gimple_assign_set_rhs2 (stmt, first);
2029 update_stmt (stmt);
2030 rhs2 = first;
2034 opv = optab_for_tree_code (code, type, optab_vector);
2035 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2036 op = opv;
2037 else
2039 op = optab_for_tree_code (code, type, optab_scalar);
2041 compute_type = get_compute_type (code, op, type);
2042 if (compute_type == type)
2043 return;
2044 /* The rtl expander will expand vector/scalar as vector/vector
2045 if necessary. Pick one with wider vector type. */
2046 tree compute_vtype = get_compute_type (code, opv, type);
2047 if (subparts_gt (compute_vtype, compute_type))
2049 compute_type = compute_vtype;
2050 op = opv;
2054 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2056 if (compute_type == NULL_TREE)
2057 compute_type = get_compute_type (code, op, type);
2058 if (compute_type == type)
2059 return;
2060 /* Before splitting vector rotates into scalar rotates,
2061 see if we can't use vector shifts and BIT_IOR_EXPR
2062 instead. For vector by vector rotates we'd also
2063 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2064 for now, fold doesn't seem to create such rotates anyway. */
2065 if (compute_type == TREE_TYPE (type)
2066 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2068 optab oplv = vashl_optab, opl = ashl_optab;
2069 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2070 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2071 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2072 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2073 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2074 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2075 /* The rtl expander will expand vector/scalar as vector/vector
2076 if necessary. Pick one with wider vector type. */
2077 if (subparts_gt (compute_lvtype, compute_ltype))
2079 compute_ltype = compute_lvtype;
2080 opl = oplv;
2082 if (subparts_gt (compute_rvtype, compute_rtype))
2084 compute_rtype = compute_rvtype;
2085 opr = oprv;
2087 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2088 BIT_IOR_EXPR. */
2089 compute_type = compute_ltype;
2090 if (subparts_gt (compute_type, compute_rtype))
2091 compute_type = compute_rtype;
2092 if (subparts_gt (compute_type, compute_otype))
2093 compute_type = compute_otype;
2094 /* Verify all 3 operations can be performed in that type. */
2095 if (compute_type != TREE_TYPE (type))
2097 if (optab_handler (opl, TYPE_MODE (compute_type))
2098 == CODE_FOR_nothing
2099 || optab_handler (opr, TYPE_MODE (compute_type))
2100 == CODE_FOR_nothing
2101 || optab_handler (opo, TYPE_MODE (compute_type))
2102 == CODE_FOR_nothing)
2103 compute_type = TREE_TYPE (type);
2108 else
2109 op = optab_for_tree_code (code, type, optab_default);
2111 /* Optabs will try converting a negation into a subtraction, so
2112 look for it as well. TODO: negation of floating-point vectors
2113 might be turned into an exclusive OR toggling the sign bit. */
2114 if (op == unknown_optab
2115 && code == NEGATE_EXPR
2116 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2117 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2119 if (compute_type == NULL_TREE)
2120 compute_type = get_compute_type (code, op, type);
2121 if (compute_type == type)
2122 return;
2124 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
2126 /* Leave expression untouched for later expansion. */
2127 if (new_rhs == NULL_TREE)
2128 return;
2130 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2131 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2132 new_rhs);
2134 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
2135 way to do it is change expand_vector_operation and its callees to
2136 return a tree_code, RHS1 and RHS2 instead of a tree. */
2137 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2138 update_stmt (gsi_stmt (*gsi));
2141 /* Use this to lower vector operations introduced by the vectorizer,
2142 if it may need the bit-twiddling tricks implemented in this file. */
2144 static unsigned int
2145 expand_vector_operations (void)
2147 gimple_stmt_iterator gsi;
2148 basic_block bb;
2149 bool cfg_changed = false;
2151 FOR_EACH_BB_FN (bb, cfun)
2153 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2155 expand_vector_operations_1 (&gsi);
2156 /* ??? If we do not cleanup EH then we will ICE in
2157 verification. But in reality we have created wrong-code
2158 as we did not properly transition EH info and edges to
2159 the piecewise computations. */
2160 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2161 && gimple_purge_dead_eh_edges (bb))
2162 cfg_changed = true;
2166 return cfg_changed ? TODO_cleanup_cfg : 0;
2169 namespace {
2171 const pass_data pass_data_lower_vector =
2173 GIMPLE_PASS, /* type */
2174 "veclower", /* name */
2175 OPTGROUP_VEC, /* optinfo_flags */
2176 TV_NONE, /* tv_id */
2177 PROP_cfg, /* properties_required */
2178 PROP_gimple_lvec, /* properties_provided */
2179 0, /* properties_destroyed */
2180 0, /* todo_flags_start */
2181 TODO_update_ssa, /* todo_flags_finish */
2184 class pass_lower_vector : public gimple_opt_pass
2186 public:
2187 pass_lower_vector (gcc::context *ctxt)
2188 : gimple_opt_pass (pass_data_lower_vector, ctxt)
2191 /* opt_pass methods: */
2192 virtual bool gate (function *fun)
2194 return !(fun->curr_properties & PROP_gimple_lvec);
2197 virtual unsigned int execute (function *)
2199 return expand_vector_operations ();
2202 }; // class pass_lower_vector
2204 } // anon namespace
2206 gimple_opt_pass *
2207 make_pass_lower_vector (gcc::context *ctxt)
2209 return new pass_lower_vector (ctxt);
2212 namespace {
2214 const pass_data pass_data_lower_vector_ssa =
2216 GIMPLE_PASS, /* type */
2217 "veclower2", /* name */
2218 OPTGROUP_VEC, /* optinfo_flags */
2219 TV_NONE, /* tv_id */
2220 PROP_cfg, /* properties_required */
2221 PROP_gimple_lvec, /* properties_provided */
2222 0, /* properties_destroyed */
2223 0, /* todo_flags_start */
2224 ( TODO_update_ssa
2225 | TODO_cleanup_cfg ), /* todo_flags_finish */
2228 class pass_lower_vector_ssa : public gimple_opt_pass
2230 public:
2231 pass_lower_vector_ssa (gcc::context *ctxt)
2232 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2235 /* opt_pass methods: */
2236 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
2237 virtual unsigned int execute (function *)
2239 return expand_vector_operations ();
2242 }; // class pass_lower_vector_ssa
2244 } // anon namespace
2246 gimple_opt_pass *
2247 make_pass_lower_vector_ssa (gcc::context *ctxt)
2249 return new pass_lower_vector_ssa (ctxt);
2252 #include "gt-tree-vect-generic.h"