re PR middle-end/91603 (Unaligned access in expand_assignment)
[official-gcc.git] / gcc / tree-vect-generic.c
blob5855653257bb96183093be6ea882a51e159cc967
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "recog.h" /* FIXME: for insn_data */
46 static void expand_vector_operations_1 (gimple_stmt_iterator *);
48 /* Return the number of elements in a vector type TYPE that we have
49 already decided needs to be expanded piecewise. We don't support
50 this kind of expansion for variable-length vectors, since we should
51 always check for target support before introducing uses of those. */
52 static unsigned int
53 nunits_for_known_piecewise_op (const_tree type)
55 return TYPE_VECTOR_SUBPARTS (type).to_constant ();
58 /* Return true if TYPE1 has more elements than TYPE2, where either
59 type may be a vector or a scalar. */
61 static inline bool
62 subparts_gt (tree type1, tree type2)
64 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
65 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
66 return known_gt (n1, n2);
69 /* Build a constant of type TYPE, made of VALUE's bits replicated
70 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
71 static tree
72 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
74 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
75 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
76 / HOST_BITS_PER_WIDE_INT;
77 unsigned HOST_WIDE_INT low, mask;
78 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
79 int i;
81 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
83 if (width == HOST_BITS_PER_WIDE_INT)
84 low = value;
85 else
87 mask = ((HOST_WIDE_INT)1 << width) - 1;
88 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
91 for (i = 0; i < n; i++)
92 a[i] = low;
94 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
95 return wide_int_to_tree
96 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
99 static GTY(()) tree vector_inner_type;
100 static GTY(()) tree vector_last_type;
101 static GTY(()) int vector_last_nunits;
103 /* Return a suitable vector types made of SUBPARTS units each of mode
104 "word_mode" (the global variable). */
105 static tree
106 build_word_mode_vector_type (int nunits)
108 if (!vector_inner_type)
109 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
110 else if (vector_last_nunits == nunits)
112 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
113 return vector_last_type;
116 vector_last_nunits = nunits;
117 vector_last_type = build_vector_type (vector_inner_type, nunits);
118 return vector_last_type;
121 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
122 tree, tree, tree, tree, tree, enum tree_code,
123 tree);
125 tree
126 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
127 tree t, tree bitsize, tree bitpos)
129 if (TREE_CODE (t) == SSA_NAME)
131 gimple *def_stmt = SSA_NAME_DEF_STMT (t);
132 if (is_gimple_assign (def_stmt)
133 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
134 || (bitpos
135 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
136 t = gimple_assign_rhs1 (def_stmt);
138 if (bitpos)
140 if (TREE_CODE (type) == BOOLEAN_TYPE)
142 tree itype
143 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
144 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
145 bitsize, bitpos);
146 return gimplify_build2 (gsi, NE_EXPR, type, field,
147 build_zero_cst (itype));
149 else
150 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
152 else
153 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
156 static tree
157 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
158 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
159 enum tree_code code, tree type ATTRIBUTE_UNUSED)
161 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
162 return gimplify_build1 (gsi, code, inner_type, a);
165 static tree
166 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
167 tree bitpos, tree bitsize, enum tree_code code,
168 tree type ATTRIBUTE_UNUSED)
170 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
171 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
172 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
173 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
174 return gimplify_build2 (gsi, code, inner_type, a, b);
177 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
179 INNER_TYPE is the type of A and B elements
181 returned expression is of signed integer type with the
182 size equal to the size of INNER_TYPE. */
183 static tree
184 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
185 tree bitpos, tree bitsize, enum tree_code code, tree type)
187 tree stype = TREE_TYPE (type);
188 tree cst_false = build_zero_cst (stype);
189 tree cst_true = build_all_ones_cst (stype);
190 tree cmp;
192 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
193 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
195 cmp = build2 (code, boolean_type_node, a, b);
196 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
199 /* Expand vector addition to scalars. This does bit twiddling
200 in order to increase parallelism:
202 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
203 (a ^ b) & 0x80808080
205 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
206 (a ^ ~b) & 0x80808080
208 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
210 This optimization should be done only if 4 vector items or more
211 fit into a word. */
212 static tree
213 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
214 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
215 enum tree_code code, tree type ATTRIBUTE_UNUSED)
217 tree inner_type = TREE_TYPE (TREE_TYPE (a));
218 unsigned HOST_WIDE_INT max;
219 tree low_bits, high_bits, a_low, b_low, result_low, signs;
221 max = GET_MODE_MASK (TYPE_MODE (inner_type));
222 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
223 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
225 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
226 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
228 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
229 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
230 if (code == PLUS_EXPR)
231 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
232 else
234 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
235 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
238 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
239 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
240 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
243 static tree
244 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
245 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
246 tree bitsize ATTRIBUTE_UNUSED,
247 enum tree_code code ATTRIBUTE_UNUSED,
248 tree type ATTRIBUTE_UNUSED)
250 tree inner_type = TREE_TYPE (TREE_TYPE (b));
251 HOST_WIDE_INT max;
252 tree low_bits, high_bits, b_low, result_low, signs;
254 max = GET_MODE_MASK (TYPE_MODE (inner_type));
255 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
256 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
258 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
260 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
261 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
262 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
263 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
264 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
267 /* Expand a vector operation to scalars, by using many operations
268 whose type is the vector type's inner type. */
269 static tree
270 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
271 tree type, tree inner_type,
272 tree a, tree b, enum tree_code code,
273 tree ret_type = NULL_TREE)
275 vec<constructor_elt, va_gc> *v;
276 tree part_width = TYPE_SIZE (inner_type);
277 tree index = bitsize_int (0);
278 int nunits = nunits_for_known_piecewise_op (type);
279 int delta = tree_to_uhwi (part_width)
280 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
281 int i;
282 location_t loc = gimple_location (gsi_stmt (*gsi));
284 if (ret_type
285 || types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
286 warning_at (loc, OPT_Wvector_operation_performance,
287 "vector operation will be expanded piecewise");
288 else
289 warning_at (loc, OPT_Wvector_operation_performance,
290 "vector operation will be expanded in parallel");
292 if (!ret_type)
293 ret_type = type;
294 vec_alloc (v, (nunits + delta - 1) / delta);
295 for (i = 0; i < nunits;
296 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
298 tree result = f (gsi, inner_type, a, b, index, part_width, code,
299 ret_type);
300 constructor_elt ce = {NULL_TREE, result};
301 v->quick_push (ce);
304 return build_constructor (ret_type, v);
307 /* Expand a vector operation to scalars with the freedom to use
308 a scalar integer type, or to use a different size for the items
309 in the vector type. */
310 static tree
311 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
312 tree a, tree b, enum tree_code code)
314 tree result, compute_type;
315 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
316 location_t loc = gimple_location (gsi_stmt (*gsi));
318 /* We have three strategies. If the type is already correct, just do
319 the operation an element at a time. Else, if the vector is wider than
320 one word, do it a word at a time; finally, if the vector is smaller
321 than one word, do it as a scalar. */
322 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
323 return expand_vector_piecewise (gsi, f,
324 type, TREE_TYPE (type),
325 a, b, code);
326 else if (n_words > 1)
328 tree word_type = build_word_mode_vector_type (n_words);
329 result = expand_vector_piecewise (gsi, f,
330 word_type, TREE_TYPE (word_type),
331 a, b, code);
332 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
333 GSI_SAME_STMT);
335 else
337 /* Use a single scalar operation with a mode no wider than word_mode. */
338 scalar_int_mode mode
339 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
340 compute_type = lang_hooks.types.type_for_mode (mode, 1);
341 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
342 warning_at (loc, OPT_Wvector_operation_performance,
343 "vector operation will be expanded with a "
344 "single scalar operation");
347 return result;
350 /* Expand a vector operation to scalars; for integer types we can use
351 special bit twiddling tricks to do the sums a word at a time, using
352 function F_PARALLEL instead of F. These tricks are done only if
353 they can process at least four items, that is, only if the vector
354 holds at least four items and if a word can hold four items. */
355 static tree
356 expand_vector_addition (gimple_stmt_iterator *gsi,
357 elem_op_func f, elem_op_func f_parallel,
358 tree type, tree a, tree b, enum tree_code code)
360 int parts_per_word = UNITS_PER_WORD
361 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
363 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
364 && parts_per_word >= 4
365 && nunits_for_known_piecewise_op (type) >= 4)
366 return expand_vector_parallel (gsi, f_parallel,
367 type, a, b, code);
368 else
369 return expand_vector_piecewise (gsi, f,
370 type, TREE_TYPE (type),
371 a, b, code);
374 /* Try to expand vector comparison expression OP0 CODE OP1 by
375 querying optab if the following expression:
376 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
377 can be expanded. */
378 static tree
379 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
380 tree op1, enum tree_code code)
382 tree t;
383 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
384 && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
386 if (VECTOR_BOOLEAN_TYPE_P (type)
387 && SCALAR_INT_MODE_P (TYPE_MODE (type))
388 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
389 TYPE_VECTOR_SUBPARTS (type)
390 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
391 (TREE_TYPE (type)))))
393 tree inner_type = TREE_TYPE (TREE_TYPE (op0));
394 tree part_width = TYPE_SIZE (inner_type);
395 tree index = bitsize_int (0);
396 int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
397 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
398 tree ret_type = build_nonstandard_integer_type (prec, 1);
399 tree ret_inner_type = boolean_type_node;
400 int i;
401 location_t loc = gimple_location (gsi_stmt (*gsi));
402 t = build_zero_cst (ret_type);
404 if (TYPE_PRECISION (ret_inner_type) != 1)
405 ret_inner_type = build_nonstandard_integer_type (1, 1);
406 warning_at (loc, OPT_Wvector_operation_performance,
407 "vector operation will be expanded piecewise");
408 for (i = 0; i < nunits;
409 i++, index = int_const_binop (PLUS_EXPR, index, part_width))
411 tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
412 index);
413 tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
414 index);
415 tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
416 t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
417 bitsize_int (i));
419 t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
421 else
422 t = expand_vector_piecewise (gsi, do_compare, type,
423 TREE_TYPE (TREE_TYPE (op0)), op0, op1,
424 code);
426 else
427 t = NULL_TREE;
429 return t;
432 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
433 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
434 the result if successful, otherwise return NULL_TREE. */
435 static tree
436 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
438 optab op;
439 unsigned int i, nunits = nunits_for_known_piecewise_op (type);
440 bool scalar_shift = true;
442 for (i = 1; i < nunits; i++)
444 if (shiftcnts[i] != shiftcnts[0])
445 scalar_shift = false;
448 if (scalar_shift && shiftcnts[0] == 0)
449 return op0;
451 if (scalar_shift)
453 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
454 if (op != unknown_optab
455 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
456 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
457 build_int_cst (NULL_TREE, shiftcnts[0]));
460 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
461 if (op != unknown_optab
462 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
464 tree_vector_builder vec (type, nunits, 1);
465 for (i = 0; i < nunits; i++)
466 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
467 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
470 return NULL_TREE;
473 /* Try to expand integer vector division by constant using
474 widening multiply, shifts and additions. */
475 static tree
476 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
477 tree op1, enum tree_code code)
479 bool use_pow2 = true;
480 bool has_vector_shift = true;
481 int mode = -1, this_mode;
482 int pre_shift = -1, post_shift;
483 unsigned int nunits = nunits_for_known_piecewise_op (type);
484 int *shifts = XALLOCAVEC (int, nunits * 4);
485 int *pre_shifts = shifts + nunits;
486 int *post_shifts = pre_shifts + nunits;
487 int *shift_temps = post_shifts + nunits;
488 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
489 int prec = TYPE_PRECISION (TREE_TYPE (type));
490 int dummy_int;
491 unsigned int i;
492 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
493 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
494 tree cur_op, mulcst, tem;
495 optab op;
497 if (prec > HOST_BITS_PER_WIDE_INT)
498 return NULL_TREE;
500 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
501 if (op == unknown_optab
502 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
503 has_vector_shift = false;
505 /* Analysis phase. Determine if all op1 elements are either power
506 of two and it is possible to expand it using shifts (or for remainder
507 using masking). Additionally compute the multiplicative constants
508 and pre and post shifts if the division is to be expanded using
509 widening or high part multiplication plus shifts. */
510 for (i = 0; i < nunits; i++)
512 tree cst = VECTOR_CST_ELT (op1, i);
513 unsigned HOST_WIDE_INT ml;
515 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
516 return NULL_TREE;
517 pre_shifts[i] = 0;
518 post_shifts[i] = 0;
519 mulc[i] = 0;
520 if (use_pow2
521 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
522 use_pow2 = false;
523 if (use_pow2)
525 shifts[i] = tree_log2 (cst);
526 if (shifts[i] != shifts[0]
527 && code == TRUNC_DIV_EXPR
528 && !has_vector_shift)
529 use_pow2 = false;
531 if (mode == -2)
532 continue;
533 if (sign_p == UNSIGNED)
535 unsigned HOST_WIDE_INT mh;
536 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
538 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
539 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
540 return NULL_TREE;
542 if (d <= 1)
544 mode = -2;
545 continue;
548 /* Find a suitable multiplier and right shift count
549 instead of multiplying with D. */
550 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
552 /* If the suggested multiplier is more than SIZE bits, we can
553 do better for even divisors, using an initial right shift. */
554 if ((mh != 0 && (d & 1) == 0)
555 || (!has_vector_shift && pre_shift != -1))
557 if (has_vector_shift)
558 pre_shift = ctz_or_zero (d);
559 else if (pre_shift == -1)
561 unsigned int j;
562 for (j = 0; j < nunits; j++)
564 tree cst2 = VECTOR_CST_ELT (op1, j);
565 unsigned HOST_WIDE_INT d2;
566 int this_pre_shift;
568 if (!tree_fits_uhwi_p (cst2))
569 return NULL_TREE;
570 d2 = tree_to_uhwi (cst2) & mask;
571 if (d2 == 0)
572 return NULL_TREE;
573 this_pre_shift = floor_log2 (d2 & -d2);
574 if (pre_shift == -1 || this_pre_shift < pre_shift)
575 pre_shift = this_pre_shift;
577 if (i != 0 && pre_shift != 0)
579 /* Restart. */
580 i = -1U;
581 mode = -1;
582 continue;
585 if (pre_shift != 0)
587 if ((d >> pre_shift) <= 1)
589 mode = -2;
590 continue;
592 mh = choose_multiplier (d >> pre_shift, prec,
593 prec - pre_shift,
594 &ml, &post_shift, &dummy_int);
595 gcc_assert (!mh);
596 pre_shifts[i] = pre_shift;
599 if (!mh)
600 this_mode = 0;
601 else
602 this_mode = 1;
604 else
606 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
607 unsigned HOST_WIDE_INT abs_d;
609 if (d == -1)
610 return NULL_TREE;
612 /* Since d might be INT_MIN, we have to cast to
613 unsigned HOST_WIDE_INT before negating to avoid
614 undefined signed overflow. */
615 abs_d = (d >= 0
616 ? (unsigned HOST_WIDE_INT) d
617 : - (unsigned HOST_WIDE_INT) d);
619 /* n rem d = n rem -d */
620 if (code == TRUNC_MOD_EXPR && d < 0)
621 d = abs_d;
622 else if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
624 /* This case is not handled correctly below. */
625 mode = -2;
626 continue;
628 if (abs_d <= 1)
630 mode = -2;
631 continue;
634 choose_multiplier (abs_d, prec, prec - 1, &ml,
635 &post_shift, &dummy_int);
636 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
638 this_mode = 4 + (d < 0);
639 ml |= HOST_WIDE_INT_M1U << (prec - 1);
641 else
642 this_mode = 2 + (d < 0);
644 mulc[i] = ml;
645 post_shifts[i] = post_shift;
646 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
647 || post_shift >= prec
648 || pre_shifts[i] >= prec)
649 this_mode = -2;
651 if (i == 0)
652 mode = this_mode;
653 else if (mode != this_mode)
654 mode = -2;
657 if (use_pow2)
659 tree addend = NULL_TREE;
660 if (sign_p == SIGNED)
662 tree uns_type;
664 /* Both division and remainder sequences need
665 op0 < 0 ? mask : 0 computed. It can be either computed as
666 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
667 if none of the shifts is 0, or as the conditional. */
668 for (i = 0; i < nunits; i++)
669 if (shifts[i] == 0)
670 break;
671 uns_type
672 = build_vector_type (build_nonstandard_integer_type (prec, 1),
673 nunits);
674 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
676 for (i = 0; i < nunits; i++)
677 shift_temps[i] = prec - 1;
678 cur_op = add_rshift (gsi, type, op0, shift_temps);
679 if (cur_op != NULL_TREE)
681 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
682 uns_type, cur_op);
683 for (i = 0; i < nunits; i++)
684 shift_temps[i] = prec - shifts[i];
685 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
686 if (cur_op != NULL_TREE)
687 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
688 type, cur_op);
691 if (addend == NULL_TREE
692 && expand_vec_cond_expr_p (type, type, LT_EXPR))
694 tree zero, cst, cond, mask_type;
695 gimple *stmt;
697 mask_type = build_same_sized_truth_vector_type (type);
698 zero = build_zero_cst (type);
699 cond = build2 (LT_EXPR, mask_type, op0, zero);
700 tree_vector_builder vec (type, nunits, 1);
701 for (i = 0; i < nunits; i++)
702 vec.quick_push (build_int_cst (TREE_TYPE (type),
703 (HOST_WIDE_INT_1U
704 << shifts[i]) - 1));
705 cst = vec.build ();
706 addend = make_ssa_name (type);
707 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
708 cst, zero);
709 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
712 if (code == TRUNC_DIV_EXPR)
714 if (sign_p == UNSIGNED)
716 /* q = op0 >> shift; */
717 cur_op = add_rshift (gsi, type, op0, shifts);
718 if (cur_op != NULL_TREE)
719 return cur_op;
721 else if (addend != NULL_TREE)
723 /* t1 = op0 + addend;
724 q = t1 >> shift; */
725 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
726 if (op != unknown_optab
727 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
729 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
730 cur_op = add_rshift (gsi, type, cur_op, shifts);
731 if (cur_op != NULL_TREE)
732 return cur_op;
736 else
738 tree mask;
739 tree_vector_builder vec (type, nunits, 1);
740 for (i = 0; i < nunits; i++)
741 vec.quick_push (build_int_cst (TREE_TYPE (type),
742 (HOST_WIDE_INT_1U
743 << shifts[i]) - 1));
744 mask = vec.build ();
745 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
746 if (op != unknown_optab
747 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
749 if (sign_p == UNSIGNED)
750 /* r = op0 & mask; */
751 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
752 else if (addend != NULL_TREE)
754 /* t1 = op0 + addend;
755 t2 = t1 & mask;
756 r = t2 - addend; */
757 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
758 if (op != unknown_optab
759 && optab_handler (op, TYPE_MODE (type))
760 != CODE_FOR_nothing)
762 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
763 addend);
764 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
765 cur_op, mask);
766 op = optab_for_tree_code (MINUS_EXPR, type,
767 optab_default);
768 if (op != unknown_optab
769 && optab_handler (op, TYPE_MODE (type))
770 != CODE_FOR_nothing)
771 return gimplify_build2 (gsi, MINUS_EXPR, type,
772 cur_op, addend);
779 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
780 return NULL_TREE;
782 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
783 return NULL_TREE;
785 cur_op = op0;
787 switch (mode)
789 case 0:
790 gcc_assert (sign_p == UNSIGNED);
791 /* t1 = oprnd0 >> pre_shift;
792 t2 = t1 h* ml;
793 q = t2 >> post_shift; */
794 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
795 if (cur_op == NULL_TREE)
796 return NULL_TREE;
797 break;
798 case 1:
799 gcc_assert (sign_p == UNSIGNED);
800 for (i = 0; i < nunits; i++)
802 shift_temps[i] = 1;
803 post_shifts[i]--;
805 break;
806 case 2:
807 case 3:
808 case 4:
809 case 5:
810 gcc_assert (sign_p == SIGNED);
811 for (i = 0; i < nunits; i++)
812 shift_temps[i] = prec - 1;
813 break;
814 default:
815 return NULL_TREE;
818 tree_vector_builder vec (type, nunits, 1);
819 for (i = 0; i < nunits; i++)
820 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
821 mulcst = vec.build ();
823 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
825 switch (mode)
827 case 0:
828 /* t1 = oprnd0 >> pre_shift;
829 t2 = t1 h* ml;
830 q = t2 >> post_shift; */
831 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
832 break;
833 case 1:
834 /* t1 = oprnd0 h* ml;
835 t2 = oprnd0 - t1;
836 t3 = t2 >> 1;
837 t4 = t1 + t3;
838 q = t4 >> (post_shift - 1); */
839 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
840 if (op == unknown_optab
841 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
842 return NULL_TREE;
843 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
844 tem = add_rshift (gsi, type, tem, shift_temps);
845 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
846 if (op == unknown_optab
847 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
848 return NULL_TREE;
849 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
850 cur_op = add_rshift (gsi, type, tem, post_shifts);
851 if (cur_op == NULL_TREE)
852 return NULL_TREE;
853 break;
854 case 2:
855 case 3:
856 case 4:
857 case 5:
858 /* t1 = oprnd0 h* ml;
859 t2 = t1; [ iff (mode & 2) != 0 ]
860 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
861 t3 = t2 >> post_shift;
862 t4 = oprnd0 >> (prec - 1);
863 q = t3 - t4; [ iff (mode & 1) == 0 ]
864 q = t4 - t3; [ iff (mode & 1) != 0 ] */
865 if ((mode & 2) == 0)
867 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
868 if (op == unknown_optab
869 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
870 return NULL_TREE;
871 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
873 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
874 if (cur_op == NULL_TREE)
875 return NULL_TREE;
876 tem = add_rshift (gsi, type, op0, shift_temps);
877 if (tem == NULL_TREE)
878 return NULL_TREE;
879 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
880 if (op == unknown_optab
881 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
882 return NULL_TREE;
883 if ((mode & 1) == 0)
884 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
885 else
886 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
887 break;
888 default:
889 gcc_unreachable ();
892 if (code == TRUNC_DIV_EXPR)
893 return cur_op;
895 /* We divided. Now finish by:
896 t1 = q * oprnd1;
897 r = oprnd0 - t1; */
898 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
899 if (op == unknown_optab
900 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
901 return NULL_TREE;
902 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
903 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
904 if (op == unknown_optab
905 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
906 return NULL_TREE;
907 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
910 /* Expand a vector condition to scalars, by using many conditions
911 on the vector's elements. */
912 static void
913 expand_vector_condition (gimple_stmt_iterator *gsi)
915 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
916 tree type = gimple_expr_type (stmt);
917 tree a = gimple_assign_rhs1 (stmt);
918 tree a1 = a;
919 tree a2 = NULL_TREE;
920 bool a_is_comparison = false;
921 bool a_is_scalar_bitmask = false;
922 tree b = gimple_assign_rhs2 (stmt);
923 tree c = gimple_assign_rhs3 (stmt);
924 vec<constructor_elt, va_gc> *v;
925 tree constr;
926 tree inner_type = TREE_TYPE (type);
927 tree cond_type = TREE_TYPE (TREE_TYPE (a));
928 tree comp_inner_type = cond_type;
929 tree width = TYPE_SIZE (inner_type);
930 tree index = bitsize_int (0);
931 tree comp_width = width;
932 tree comp_index = index;
933 int i;
934 location_t loc = gimple_location (gsi_stmt (*gsi));
936 if (!is_gimple_val (a))
938 gcc_assert (COMPARISON_CLASS_P (a));
939 a_is_comparison = true;
940 a1 = TREE_OPERAND (a, 0);
941 a2 = TREE_OPERAND (a, 1);
942 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
943 comp_width = TYPE_SIZE (comp_inner_type);
946 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
947 return;
949 /* Handle vector boolean types with bitmasks. If there is a comparison
950 and we can expand the comparison into the vector boolean bitmask,
951 or otherwise if it is compatible with type, we can transform
952 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
953 into
954 tmp_6 = x_2 < y_3;
955 tmp_7 = tmp_6 & vbfld_4;
956 tmp_8 = ~tmp_6;
957 tmp_9 = tmp_8 & vbfld_5;
958 vbfld_1 = tmp_7 | tmp_9;
959 Similarly for vbfld_10 instead of x_2 < y_3. */
960 if (VECTOR_BOOLEAN_TYPE_P (type)
961 && SCALAR_INT_MODE_P (TYPE_MODE (type))
962 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
963 TYPE_VECTOR_SUBPARTS (type)
964 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
965 && (a_is_comparison
966 ? useless_type_conversion_p (type, TREE_TYPE (a))
967 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
969 if (a_is_comparison)
970 a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
971 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
972 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
973 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
974 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
975 gimple_assign_set_rhs_from_tree (gsi, a);
976 update_stmt (gsi_stmt (*gsi));
977 return;
980 /* TODO: try and find a smaller vector type. */
982 warning_at (loc, OPT_Wvector_operation_performance,
983 "vector condition will be expanded piecewise");
985 if (!a_is_comparison
986 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
987 && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
988 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
989 TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
990 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
991 (TREE_TYPE (TREE_TYPE (a))))))
993 a_is_scalar_bitmask = true;
994 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
995 tree atype = build_nonstandard_integer_type (prec, 1);
996 a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
999 int nunits = nunits_for_known_piecewise_op (type);
1000 vec_alloc (v, nunits);
1001 for (i = 0; i < nunits; i++)
1003 tree aa, result;
1004 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1005 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1006 if (a_is_comparison)
1008 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1009 comp_width, comp_index);
1010 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1011 comp_width, comp_index);
1012 aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
1014 else if (a_is_scalar_bitmask)
1016 wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1017 result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1018 a, wide_int_to_tree (TREE_TYPE (a), w));
1019 aa = fold_build2 (NE_EXPR, boolean_type_node, result,
1020 build_zero_cst (TREE_TYPE (a)));
1022 else
1023 aa = tree_vec_extract (gsi, cond_type, a, width, index);
1024 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1025 constructor_elt ce = {NULL_TREE, result};
1026 v->quick_push (ce);
1027 index = int_const_binop (PLUS_EXPR, index, width);
1028 if (width == comp_width)
1029 comp_index = index;
1030 else
1031 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1034 constr = build_constructor (type, v);
1035 gimple_assign_set_rhs_from_tree (gsi, constr);
1036 update_stmt (gsi_stmt (*gsi));
1039 static tree
1040 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1041 gassign *assign, enum tree_code code)
1043 machine_mode compute_mode = TYPE_MODE (compute_type);
1045 /* If the compute mode is not a vector mode (hence we are not decomposing
1046 a BLKmode vector to smaller, hardware-supported vectors), we may want
1047 to expand the operations in parallel. */
1048 if (!VECTOR_MODE_P (compute_mode))
1049 switch (code)
1051 case PLUS_EXPR:
1052 case MINUS_EXPR:
1053 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1054 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1055 gimple_assign_rhs1 (assign),
1056 gimple_assign_rhs2 (assign), code);
1057 break;
1059 case NEGATE_EXPR:
1060 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1061 return expand_vector_addition (gsi, do_unop, do_negate, type,
1062 gimple_assign_rhs1 (assign),
1063 NULL_TREE, code);
1064 break;
1066 case BIT_AND_EXPR:
1067 case BIT_IOR_EXPR:
1068 case BIT_XOR_EXPR:
1069 return expand_vector_parallel (gsi, do_binop, type,
1070 gimple_assign_rhs1 (assign),
1071 gimple_assign_rhs2 (assign), code);
1073 case BIT_NOT_EXPR:
1074 return expand_vector_parallel (gsi, do_unop, type,
1075 gimple_assign_rhs1 (assign),
1076 NULL_TREE, code);
1077 case EQ_EXPR:
1078 case NE_EXPR:
1079 case GT_EXPR:
1080 case LT_EXPR:
1081 case GE_EXPR:
1082 case LE_EXPR:
1083 case UNEQ_EXPR:
1084 case UNGT_EXPR:
1085 case UNLT_EXPR:
1086 case UNGE_EXPR:
1087 case UNLE_EXPR:
1088 case LTGT_EXPR:
1089 case ORDERED_EXPR:
1090 case UNORDERED_EXPR:
1092 tree rhs1 = gimple_assign_rhs1 (assign);
1093 tree rhs2 = gimple_assign_rhs2 (assign);
1095 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1098 case TRUNC_DIV_EXPR:
1099 case TRUNC_MOD_EXPR:
1101 tree rhs1 = gimple_assign_rhs1 (assign);
1102 tree rhs2 = gimple_assign_rhs2 (assign);
1103 tree ret;
1105 if (!optimize
1106 || !VECTOR_INTEGER_TYPE_P (type)
1107 || TREE_CODE (rhs2) != VECTOR_CST
1108 || !VECTOR_MODE_P (TYPE_MODE (type)))
1109 break;
1111 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1112 if (ret != NULL_TREE)
1113 return ret;
1114 break;
1117 default:
1118 break;
1121 if (TREE_CODE_CLASS (code) == tcc_unary)
1122 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1123 gimple_assign_rhs1 (assign),
1124 NULL_TREE, code);
1125 else
1126 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1127 gimple_assign_rhs1 (assign),
1128 gimple_assign_rhs2 (assign), code);
1131 /* Try to optimize
1132 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1133 style stmts into:
1134 _9 = { b_7, b_7, b_7, b_7 };
1135 a_5 = _9 + { 0, 3, 6, 9 };
1136 because vector splat operation is usually more efficient
1137 than piecewise initialization of the vector. */
1139 static void
1140 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1142 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1143 tree lhs = gimple_assign_lhs (stmt);
1144 tree rhs = gimple_assign_rhs1 (stmt);
1145 tree type = TREE_TYPE (rhs);
1146 unsigned int i, j;
1147 unsigned HOST_WIDE_INT nelts;
1148 bool all_same = true;
1149 constructor_elt *elt;
1150 gimple *g;
1151 tree base = NULL_TREE;
1152 optab op;
1154 if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1155 || nelts <= 2
1156 || CONSTRUCTOR_NELTS (rhs) != nelts)
1157 return;
1158 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1159 if (op == unknown_optab
1160 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1161 return;
1162 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1163 if (TREE_CODE (elt->value) != SSA_NAME
1164 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1165 return;
1166 else
1168 tree this_base = elt->value;
1169 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1170 all_same = false;
1171 for (j = 0; j < nelts + 1; j++)
1173 g = SSA_NAME_DEF_STMT (this_base);
1174 if (is_gimple_assign (g)
1175 && gimple_assign_rhs_code (g) == PLUS_EXPR
1176 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1177 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1178 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1179 this_base = gimple_assign_rhs1 (g);
1180 else
1181 break;
1183 if (i == 0)
1184 base = this_base;
1185 else if (this_base != base)
1186 return;
1188 if (all_same)
1189 return;
1190 tree_vector_builder cst (type, nelts, 1);
1191 for (i = 0; i < nelts; i++)
1193 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1194 tree elt = build_zero_cst (TREE_TYPE (base));
1195 while (this_base != base)
1197 g = SSA_NAME_DEF_STMT (this_base);
1198 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1199 elt, gimple_assign_rhs2 (g));
1200 if (elt == NULL_TREE
1201 || TREE_CODE (elt) != INTEGER_CST
1202 || TREE_OVERFLOW (elt))
1203 return;
1204 this_base = gimple_assign_rhs1 (g);
1206 cst.quick_push (elt);
1208 for (i = 0; i < nelts; i++)
1209 CONSTRUCTOR_ELT (rhs, i)->value = base;
1210 g = gimple_build_assign (make_ssa_name (type), rhs);
1211 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1212 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1213 cst.build ());
1214 gsi_replace (gsi, g, false);
1217 /* Return a type for the widest vector mode whose components are of type
1218 TYPE, or NULL_TREE if none is found. */
1220 static tree
1221 type_for_widest_vector_mode (tree type, optab op)
1223 machine_mode inner_mode = TYPE_MODE (type);
1224 machine_mode best_mode = VOIDmode, mode;
1225 poly_int64 best_nunits = 0;
1227 if (SCALAR_FLOAT_MODE_P (inner_mode))
1228 mode = MIN_MODE_VECTOR_FLOAT;
1229 else if (SCALAR_FRACT_MODE_P (inner_mode))
1230 mode = MIN_MODE_VECTOR_FRACT;
1231 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1232 mode = MIN_MODE_VECTOR_UFRACT;
1233 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1234 mode = MIN_MODE_VECTOR_ACCUM;
1235 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1236 mode = MIN_MODE_VECTOR_UACCUM;
1237 else if (inner_mode == BImode)
1238 mode = MIN_MODE_VECTOR_BOOL;
1239 else
1240 mode = MIN_MODE_VECTOR_INT;
1242 FOR_EACH_MODE_FROM (mode, mode)
1243 if (GET_MODE_INNER (mode) == inner_mode
1244 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1245 && optab_handler (op, mode) != CODE_FOR_nothing)
1246 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1248 if (best_mode == VOIDmode)
1249 return NULL_TREE;
1250 else
1251 return build_vector_type_for_mode (type, best_mode);
1255 /* Build a reference to the element of the vector VECT. Function
1256 returns either the element itself, either BIT_FIELD_REF, or an
1257 ARRAY_REF expression.
1259 GSI is required to insert temporary variables while building a
1260 refernece to the element of the vector VECT.
1262 PTMPVEC is a pointer to the temporary variable for caching
1263 purposes. In case when PTMPVEC is NULL new temporary variable
1264 will be created. */
1265 static tree
1266 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1268 tree vect_type, vect_elt_type;
1269 gimple *asgn;
1270 tree tmpvec;
1271 tree arraytype;
1272 bool need_asgn = true;
1273 unsigned int elements;
1275 vect_type = TREE_TYPE (vect);
1276 vect_elt_type = TREE_TYPE (vect_type);
1277 elements = nunits_for_known_piecewise_op (vect_type);
1279 if (TREE_CODE (idx) == INTEGER_CST)
1281 unsigned HOST_WIDE_INT index;
1283 /* Given that we're about to compute a binary modulus,
1284 we don't care about the high bits of the value. */
1285 index = TREE_INT_CST_LOW (idx);
1286 if (!tree_fits_uhwi_p (idx) || index >= elements)
1288 index &= elements - 1;
1289 idx = build_int_cst (TREE_TYPE (idx), index);
1292 /* When lowering a vector statement sequence do some easy
1293 simplification by looking through intermediate vector results. */
1294 if (TREE_CODE (vect) == SSA_NAME)
1296 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1297 if (is_gimple_assign (def_stmt)
1298 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1299 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1300 vect = gimple_assign_rhs1 (def_stmt);
1303 if (TREE_CODE (vect) == VECTOR_CST)
1304 return VECTOR_CST_ELT (vect, index);
1305 else if (TREE_CODE (vect) == CONSTRUCTOR
1306 && (CONSTRUCTOR_NELTS (vect) == 0
1307 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1308 != VECTOR_TYPE))
1310 if (index < CONSTRUCTOR_NELTS (vect))
1311 return CONSTRUCTOR_ELT (vect, index)->value;
1312 return build_zero_cst (vect_elt_type);
1314 else
1316 tree size = TYPE_SIZE (vect_elt_type);
1317 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1318 size);
1319 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1323 if (!ptmpvec)
1324 tmpvec = create_tmp_var (vect_type, "vectmp");
1325 else if (!*ptmpvec)
1326 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1327 else
1329 tmpvec = *ptmpvec;
1330 need_asgn = false;
1333 if (need_asgn)
1335 TREE_ADDRESSABLE (tmpvec) = 1;
1336 asgn = gimple_build_assign (tmpvec, vect);
1337 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1340 arraytype = build_array_type_nelts (vect_elt_type, elements);
1341 return build4 (ARRAY_REF, vect_elt_type,
1342 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1343 idx, NULL_TREE, NULL_TREE);
1346 /* Check if VEC_PERM_EXPR within the given setting is supported
1347 by hardware, or lower it piecewise.
1349 When VEC_PERM_EXPR has the same first and second operands:
1350 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1351 {v0[mask[0]], v0[mask[1]], ...}
1352 MASK and V0 must have the same number of elements.
1354 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1355 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1356 V0 and V1 must have the same type. MASK, V0, V1 must have the
1357 same number of arguments. */
1359 static void
1360 lower_vec_perm (gimple_stmt_iterator *gsi)
1362 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1363 tree mask = gimple_assign_rhs3 (stmt);
1364 tree vec0 = gimple_assign_rhs1 (stmt);
1365 tree vec1 = gimple_assign_rhs2 (stmt);
1366 tree vect_type = TREE_TYPE (vec0);
1367 tree mask_type = TREE_TYPE (mask);
1368 tree vect_elt_type = TREE_TYPE (vect_type);
1369 tree mask_elt_type = TREE_TYPE (mask_type);
1370 unsigned HOST_WIDE_INT elements;
1371 vec<constructor_elt, va_gc> *v;
1372 tree constr, t, si, i_val;
1373 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1374 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1375 location_t loc = gimple_location (gsi_stmt (*gsi));
1376 unsigned i;
1378 if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1379 return;
1381 if (TREE_CODE (mask) == SSA_NAME)
1383 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1384 if (is_gimple_assign (def_stmt)
1385 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1386 mask = gimple_assign_rhs1 (def_stmt);
1389 vec_perm_builder sel_int;
1391 if (TREE_CODE (mask) == VECTOR_CST
1392 && tree_to_vec_perm_builder (&sel_int, mask))
1394 vec_perm_indices indices (sel_int, 2, elements);
1395 if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1397 gimple_assign_set_rhs3 (stmt, mask);
1398 update_stmt (stmt);
1399 return;
1401 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1402 vector as VEC1 and a right element shift MASK. */
1403 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1404 != CODE_FOR_nothing
1405 && TREE_CODE (vec1) == VECTOR_CST
1406 && initializer_zerop (vec1)
1407 && maybe_ne (indices[0], 0)
1408 && known_lt (poly_uint64 (indices[0]), elements))
1410 bool ok_p = indices.series_p (0, 1, indices[0], 1);
1411 if (!ok_p)
1413 for (i = 1; i < elements; ++i)
1415 poly_uint64 actual = indices[i];
1416 poly_uint64 expected = i + indices[0];
1417 /* Indices into the second vector are all equivalent. */
1418 if (maybe_lt (actual, elements)
1419 ? maybe_ne (actual, expected)
1420 : maybe_lt (expected, elements))
1421 break;
1423 ok_p = i == elements;
1425 if (ok_p)
1427 gimple_assign_set_rhs3 (stmt, mask);
1428 update_stmt (stmt);
1429 return;
1432 /* And similarly vec_shl pattern. */
1433 if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1434 != CODE_FOR_nothing
1435 && TREE_CODE (vec0) == VECTOR_CST
1436 && initializer_zerop (vec0))
1438 unsigned int first = 0;
1439 for (i = 0; i < elements; ++i)
1440 if (known_eq (poly_uint64 (indices[i]), elements))
1442 if (i == 0 || first)
1443 break;
1444 first = i;
1446 else if (first
1447 ? maybe_ne (poly_uint64 (indices[i]),
1448 elements + i - first)
1449 : maybe_ge (poly_uint64 (indices[i]), elements))
1450 break;
1451 if (i == elements)
1453 gimple_assign_set_rhs3 (stmt, mask);
1454 update_stmt (stmt);
1455 return;
1459 else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1460 return;
1462 warning_at (loc, OPT_Wvector_operation_performance,
1463 "vector shuffling operation will be expanded piecewise");
1465 vec_alloc (v, elements);
1466 for (i = 0; i < elements; i++)
1468 si = size_int (i);
1469 i_val = vector_element (gsi, mask, si, &masktmp);
1471 if (TREE_CODE (i_val) == INTEGER_CST)
1473 unsigned HOST_WIDE_INT index;
1475 index = TREE_INT_CST_LOW (i_val);
1476 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1477 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1479 if (two_operand_p && (index & elements) != 0)
1480 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1481 else
1482 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1484 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1485 true, GSI_SAME_STMT);
1487 else
1489 tree cond = NULL_TREE, v0_val;
1491 if (two_operand_p)
1493 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1494 build_int_cst (mask_elt_type, elements));
1495 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1496 true, GSI_SAME_STMT);
1499 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1500 build_int_cst (mask_elt_type, elements - 1));
1501 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1502 true, GSI_SAME_STMT);
1504 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1505 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1506 true, GSI_SAME_STMT);
1508 if (two_operand_p)
1510 tree v1_val;
1512 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1513 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1514 true, GSI_SAME_STMT);
1516 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1517 cond, build_zero_cst (mask_elt_type));
1518 cond = fold_build3 (COND_EXPR, vect_elt_type,
1519 cond, v0_val, v1_val);
1520 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1521 true, GSI_SAME_STMT);
1523 else
1524 t = v0_val;
1527 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1530 constr = build_constructor (vect_type, v);
1531 gimple_assign_set_rhs_from_tree (gsi, constr);
1532 update_stmt (gsi_stmt (*gsi));
1535 /* If OP is a uniform vector return the element it is a splat from. */
1537 static tree
1538 ssa_uniform_vector_p (tree op)
1540 if (TREE_CODE (op) == VECTOR_CST
1541 || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1542 || TREE_CODE (op) == CONSTRUCTOR)
1543 return uniform_vector_p (op);
1544 if (TREE_CODE (op) == SSA_NAME)
1546 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1547 if (gimple_assign_single_p (def_stmt))
1548 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1550 return NULL_TREE;
1553 /* Return type in which CODE operation with optab OP can be
1554 computed. */
1556 static tree
1557 get_compute_type (enum tree_code code, optab op, tree type)
1559 /* For very wide vectors, try using a smaller vector mode. */
1560 tree compute_type = type;
1561 if (op
1562 && (!VECTOR_MODE_P (TYPE_MODE (type))
1563 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1565 tree vector_compute_type
1566 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1567 if (vector_compute_type != NULL_TREE
1568 && subparts_gt (compute_type, vector_compute_type)
1569 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1570 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1571 != CODE_FOR_nothing))
1572 compute_type = vector_compute_type;
1575 /* If we are breaking a BLKmode vector into smaller pieces,
1576 type_for_widest_vector_mode has already looked into the optab,
1577 so skip these checks. */
1578 if (compute_type == type)
1580 machine_mode compute_mode = TYPE_MODE (compute_type);
1581 if (VECTOR_MODE_P (compute_mode))
1583 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1584 return compute_type;
1585 if (code == MULT_HIGHPART_EXPR
1586 && can_mult_highpart_p (compute_mode,
1587 TYPE_UNSIGNED (compute_type)))
1588 return compute_type;
1590 /* There is no operation in hardware, so fall back to scalars. */
1591 compute_type = TREE_TYPE (type);
1594 return compute_type;
1597 static tree
1598 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1599 tree bitpos, tree bitsize, enum tree_code code,
1600 tree type ATTRIBUTE_UNUSED)
1602 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1603 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1604 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1605 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1606 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1607 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1610 /* Expand a vector COND_EXPR to scalars, piecewise. */
1611 static void
1612 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1614 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1615 tree type = gimple_expr_type (stmt);
1616 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1617 machine_mode compute_mode = TYPE_MODE (compute_type);
1618 gcc_assert (compute_mode != BLKmode);
1619 tree lhs = gimple_assign_lhs (stmt);
1620 tree rhs2 = gimple_assign_rhs2 (stmt);
1621 tree rhs3 = gimple_assign_rhs3 (stmt);
1622 tree new_rhs;
1624 /* If the compute mode is not a vector mode (hence we are not decomposing
1625 a BLKmode vector to smaller, hardware-supported vectors), we may want
1626 to expand the operations in parallel. */
1627 if (!VECTOR_MODE_P (compute_mode))
1628 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1629 COND_EXPR);
1630 else
1631 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1632 rhs2, rhs3, COND_EXPR);
1633 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1634 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1635 new_rhs);
1637 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1638 way to do it is change expand_vector_operation and its callees to
1639 return a tree_code, RHS1 and RHS2 instead of a tree. */
1640 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1641 update_stmt (gsi_stmt (*gsi));
1644 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1645 lowering. If INNER_TYPE is not a vector type, this is a scalar
1646 fallback. */
1648 static tree
1649 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1650 tree decl, tree bitpos, tree bitsize,
1651 enum tree_code code, tree type)
1653 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1654 if (!VECTOR_TYPE_P (inner_type))
1655 return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1656 if (code == CALL_EXPR)
1658 gimple *g = gimple_build_call (decl, 1, a);
1659 tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1660 gimple_call_set_lhs (g, lhs);
1661 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1662 return lhs;
1664 else
1666 tree outer_type = build_vector_type (TREE_TYPE (type),
1667 TYPE_VECTOR_SUBPARTS (inner_type));
1668 return gimplify_build1 (gsi, code, outer_type, a);
1672 /* Similarly, but for narrowing conversion. */
1674 static tree
1675 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1676 tree, tree bitpos, tree, enum tree_code code,
1677 tree type)
1679 tree itype = build_vector_type (TREE_TYPE (inner_type),
1680 exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1681 2));
1682 tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1683 tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1684 int_const_binop (PLUS_EXPR, bitpos,
1685 TYPE_SIZE (itype)));
1686 tree outer_type = build_vector_type (TREE_TYPE (type),
1687 TYPE_VECTOR_SUBPARTS (inner_type));
1688 return gimplify_build2 (gsi, code, outer_type, b, c);
1691 /* Expand VEC_CONVERT ifn call. */
1693 static void
1694 expand_vector_conversion (gimple_stmt_iterator *gsi)
1696 gimple *stmt = gsi_stmt (*gsi);
1697 gimple *g;
1698 tree lhs = gimple_call_lhs (stmt);
1699 tree arg = gimple_call_arg (stmt, 0);
1700 tree decl = NULL_TREE;
1701 tree ret_type = TREE_TYPE (lhs);
1702 tree arg_type = TREE_TYPE (arg);
1703 tree new_rhs, compute_type = TREE_TYPE (arg_type);
1704 enum tree_code code = NOP_EXPR;
1705 enum tree_code code1 = ERROR_MARK;
1706 enum { NARROW, NONE, WIDEN } modifier = NONE;
1707 optab optab1 = unknown_optab;
1709 gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1710 gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (ret_type))));
1711 gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (arg_type))));
1712 if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1713 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1714 code = FIX_TRUNC_EXPR;
1715 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1716 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1717 code = FLOAT_EXPR;
1718 if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
1719 < tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
1720 modifier = NARROW;
1721 else if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
1722 > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
1723 modifier = WIDEN;
1725 if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1727 if (supportable_convert_operation (code, ret_type, arg_type, &decl,
1728 &code1))
1730 if (code1 == CALL_EXPR)
1732 g = gimple_build_call (decl, 1, arg);
1733 gimple_call_set_lhs (g, lhs);
1735 else
1736 g = gimple_build_assign (lhs, code1, arg);
1737 gsi_replace (gsi, g, false);
1738 return;
1740 /* Can't use get_compute_type here, as supportable_convert_operation
1741 doesn't necessarily use an optab and needs two arguments. */
1742 tree vec_compute_type
1743 = type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
1744 if (vec_compute_type
1745 && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
1746 && subparts_gt (arg_type, vec_compute_type))
1748 unsigned HOST_WIDE_INT nelts
1749 = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1750 while (nelts > 1)
1752 tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1753 tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1754 if (supportable_convert_operation (code, ret1_type, arg1_type,
1755 &decl, &code1))
1757 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1758 ret_type, arg1_type, arg,
1759 decl, code1);
1760 g = gimple_build_assign (lhs, new_rhs);
1761 gsi_replace (gsi, g, false);
1762 return;
1764 nelts = nelts / 2;
1768 else if (modifier == NARROW)
1770 switch (code)
1772 CASE_CONVERT:
1773 code1 = VEC_PACK_TRUNC_EXPR;
1774 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1775 break;
1776 case FIX_TRUNC_EXPR:
1777 code1 = VEC_PACK_FIX_TRUNC_EXPR;
1778 /* The signedness is determined from output operand. */
1779 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1780 break;
1781 case FLOAT_EXPR:
1782 code1 = VEC_PACK_FLOAT_EXPR;
1783 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1784 break;
1785 default:
1786 gcc_unreachable ();
1789 if (optab1)
1790 compute_type = get_compute_type (code1, optab1, arg_type);
1791 enum insn_code icode1;
1792 if (VECTOR_TYPE_P (compute_type)
1793 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1794 != CODE_FOR_nothing)
1795 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1797 tree cretd_type
1798 = build_vector_type (TREE_TYPE (ret_type),
1799 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1800 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1802 if (compute_type == arg_type)
1804 new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1805 arg, build_zero_cst (arg_type));
1806 new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1807 TYPE_SIZE (ret_type),
1808 bitsize_int (0));
1809 g = gimple_build_assign (lhs, new_rhs);
1810 gsi_replace (gsi, g, false);
1811 return;
1813 tree dcompute_type
1814 = build_vector_type (TREE_TYPE (compute_type),
1815 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1816 if (TYPE_MAIN_VARIANT (dcompute_type)
1817 == TYPE_MAIN_VARIANT (arg_type))
1818 new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1819 NULL_TREE, bitsize_int (0),
1820 NULL_TREE, code1,
1821 ret_type);
1822 else
1823 new_rhs = expand_vector_piecewise (gsi,
1824 do_vec_narrow_conversion,
1825 arg_type, dcompute_type,
1826 arg, NULL_TREE, code1,
1827 ret_type);
1828 g = gimple_build_assign (lhs, new_rhs);
1829 gsi_replace (gsi, g, false);
1830 return;
1834 else if (modifier == WIDEN)
1836 enum tree_code code2 = ERROR_MARK;
1837 optab optab2 = unknown_optab;
1838 switch (code)
1840 CASE_CONVERT:
1841 code1 = VEC_UNPACK_LO_EXPR;
1842 code2 = VEC_UNPACK_HI_EXPR;
1843 break;
1844 case FIX_TRUNC_EXPR:
1845 code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
1846 code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
1847 break;
1848 case FLOAT_EXPR:
1849 code1 = VEC_UNPACK_FLOAT_LO_EXPR;
1850 code2 = VEC_UNPACK_FLOAT_HI_EXPR;
1851 break;
1852 default:
1853 gcc_unreachable ();
1855 if (BYTES_BIG_ENDIAN)
1856 std::swap (code1, code2);
1858 if (code == FIX_TRUNC_EXPR)
1860 /* The signedness is determined from output operand. */
1861 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1862 optab2 = optab_for_tree_code (code2, ret_type, optab_default);
1864 else
1866 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1867 optab2 = optab_for_tree_code (code2, arg_type, optab_default);
1870 if (optab1 && optab2)
1871 compute_type = get_compute_type (code1, optab1, arg_type);
1873 enum insn_code icode1, icode2;
1874 if (VECTOR_TYPE_P (compute_type)
1875 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1876 != CODE_FOR_nothing)
1877 && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
1878 != CODE_FOR_nothing)
1879 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
1880 && (insn_data[icode1].operand[0].mode
1881 == insn_data[icode2].operand[0].mode))
1883 poly_uint64 nunits
1884 = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
1885 tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
1886 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1888 vec<constructor_elt, va_gc> *v;
1889 tree part_width = TYPE_SIZE (compute_type);
1890 tree index = bitsize_int (0);
1891 int nunits = nunits_for_known_piecewise_op (arg_type);
1892 int delta = tree_to_uhwi (part_width)
1893 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)));
1894 int i;
1895 location_t loc = gimple_location (gsi_stmt (*gsi));
1897 if (compute_type != arg_type)
1898 warning_at (loc, OPT_Wvector_operation_performance,
1899 "vector operation will be expanded piecewise");
1900 else
1902 nunits = 1;
1903 delta = 1;
1906 vec_alloc (v, (nunits + delta - 1) / delta * 2);
1907 for (i = 0; i < nunits;
1908 i += delta, index = int_const_binop (PLUS_EXPR, index,
1909 part_width))
1911 tree a = arg;
1912 if (compute_type != arg_type)
1913 a = tree_vec_extract (gsi, compute_type, a, part_width,
1914 index);
1915 tree result = gimplify_build1 (gsi, code1, cretd_type, a);
1916 constructor_elt ce = { NULL_TREE, result };
1917 v->quick_push (ce);
1918 ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
1919 v->quick_push (ce);
1922 new_rhs = build_constructor (ret_type, v);
1923 g = gimple_build_assign (lhs, new_rhs);
1924 gsi_replace (gsi, g, false);
1925 return;
1930 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
1931 TREE_TYPE (arg_type), arg,
1932 NULL_TREE, code, ret_type);
1933 g = gimple_build_assign (lhs, new_rhs);
1934 gsi_replace (gsi, g, false);
1937 /* Process one statement. If we identify a vector operation, expand it. */
1939 static void
1940 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1942 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1943 enum tree_code code;
1944 optab op = unknown_optab;
1945 enum gimple_rhs_class rhs_class;
1946 tree new_rhs;
1948 /* Only consider code == GIMPLE_ASSIGN. */
1949 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1950 if (!stmt)
1952 if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
1953 expand_vector_conversion (gsi);
1954 return;
1957 code = gimple_assign_rhs_code (stmt);
1958 rhs_class = get_gimple_rhs_class (code);
1959 lhs = gimple_assign_lhs (stmt);
1961 if (code == VEC_PERM_EXPR)
1963 lower_vec_perm (gsi);
1964 return;
1967 if (code == VEC_COND_EXPR)
1969 expand_vector_condition (gsi);
1970 return;
1973 if (code == COND_EXPR
1974 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1975 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1977 expand_vector_scalar_condition (gsi);
1978 return;
1981 if (code == CONSTRUCTOR
1982 && TREE_CODE (lhs) == SSA_NAME
1983 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1984 && !gimple_clobber_p (stmt)
1985 && optimize)
1987 optimize_vector_constructor (gsi);
1988 return;
1991 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1992 return;
1994 rhs1 = gimple_assign_rhs1 (stmt);
1995 type = gimple_expr_type (stmt);
1996 if (rhs_class == GIMPLE_BINARY_RHS)
1997 rhs2 = gimple_assign_rhs2 (stmt);
1999 if (!VECTOR_TYPE_P (type)
2000 || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2001 return;
2003 /* A scalar operation pretending to be a vector one. */
2004 if (VECTOR_BOOLEAN_TYPE_P (type)
2005 && !VECTOR_MODE_P (TYPE_MODE (type))
2006 && TYPE_MODE (type) != BLKmode
2007 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2008 || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2009 && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2010 && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2011 return;
2013 /* If the vector operation is operating on all same vector elements
2014 implement it with a scalar operation and a splat if the target
2015 supports the scalar operation. */
2016 tree srhs1, srhs2 = NULL_TREE;
2017 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2018 && (rhs2 == NULL_TREE
2019 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2020 && (srhs2 = rhs2))
2021 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2022 /* As we query direct optabs restrict to non-convert operations. */
2023 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2025 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2026 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2027 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2029 tree slhs = make_ssa_name (TREE_TYPE (srhs1));
2030 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
2031 gsi_insert_before (gsi, repl, GSI_SAME_STMT);
2032 gimple_assign_set_rhs_from_tree (gsi,
2033 build_vector_from_val (type, slhs));
2034 update_stmt (stmt);
2035 return;
2039 if (CONVERT_EXPR_CODE_P (code)
2040 || code == FLOAT_EXPR
2041 || code == FIX_TRUNC_EXPR
2042 || code == VIEW_CONVERT_EXPR)
2043 return;
2045 /* The signedness is determined from input argument. */
2046 if (code == VEC_UNPACK_FLOAT_HI_EXPR
2047 || code == VEC_UNPACK_FLOAT_LO_EXPR
2048 || code == VEC_PACK_FLOAT_EXPR)
2050 /* We do not know how to scalarize those. */
2051 return;
2054 /* For widening/narrowing vector operations, the relevant type is of the
2055 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
2056 calculated in the same way above. */
2057 if (code == WIDEN_SUM_EXPR
2058 || code == VEC_WIDEN_MULT_HI_EXPR
2059 || code == VEC_WIDEN_MULT_LO_EXPR
2060 || code == VEC_WIDEN_MULT_EVEN_EXPR
2061 || code == VEC_WIDEN_MULT_ODD_EXPR
2062 || code == VEC_UNPACK_HI_EXPR
2063 || code == VEC_UNPACK_LO_EXPR
2064 || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2065 || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2066 || code == VEC_PACK_TRUNC_EXPR
2067 || code == VEC_PACK_SAT_EXPR
2068 || code == VEC_PACK_FIX_TRUNC_EXPR
2069 || code == VEC_WIDEN_LSHIFT_HI_EXPR
2070 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2072 /* We do not know how to scalarize those. */
2073 return;
2076 /* Choose between vector shift/rotate by vector and vector shift/rotate by
2077 scalar */
2078 if (code == LSHIFT_EXPR
2079 || code == RSHIFT_EXPR
2080 || code == LROTATE_EXPR
2081 || code == RROTATE_EXPR)
2083 optab opv;
2085 /* Check whether we have vector <op> {x,x,x,x} where x
2086 could be a scalar variable or a constant. Transform
2087 vector <op> {x,x,x,x} ==> vector <op> scalar. */
2088 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2090 tree first;
2092 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2094 gimple_assign_set_rhs2 (stmt, first);
2095 update_stmt (stmt);
2096 rhs2 = first;
2100 opv = optab_for_tree_code (code, type, optab_vector);
2101 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2102 op = opv;
2103 else
2105 op = optab_for_tree_code (code, type, optab_scalar);
2107 compute_type = get_compute_type (code, op, type);
2108 if (compute_type == type)
2109 return;
2110 /* The rtl expander will expand vector/scalar as vector/vector
2111 if necessary. Pick one with wider vector type. */
2112 tree compute_vtype = get_compute_type (code, opv, type);
2113 if (subparts_gt (compute_vtype, compute_type))
2115 compute_type = compute_vtype;
2116 op = opv;
2120 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2122 if (compute_type == NULL_TREE)
2123 compute_type = get_compute_type (code, op, type);
2124 if (compute_type == type)
2125 return;
2126 /* Before splitting vector rotates into scalar rotates,
2127 see if we can't use vector shifts and BIT_IOR_EXPR
2128 instead. For vector by vector rotates we'd also
2129 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2130 for now, fold doesn't seem to create such rotates anyway. */
2131 if (compute_type == TREE_TYPE (type)
2132 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2134 optab oplv = vashl_optab, opl = ashl_optab;
2135 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2136 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2137 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2138 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2139 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2140 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2141 /* The rtl expander will expand vector/scalar as vector/vector
2142 if necessary. Pick one with wider vector type. */
2143 if (subparts_gt (compute_lvtype, compute_ltype))
2145 compute_ltype = compute_lvtype;
2146 opl = oplv;
2148 if (subparts_gt (compute_rvtype, compute_rtype))
2150 compute_rtype = compute_rvtype;
2151 opr = oprv;
2153 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2154 BIT_IOR_EXPR. */
2155 compute_type = compute_ltype;
2156 if (subparts_gt (compute_type, compute_rtype))
2157 compute_type = compute_rtype;
2158 if (subparts_gt (compute_type, compute_otype))
2159 compute_type = compute_otype;
2160 /* Verify all 3 operations can be performed in that type. */
2161 if (compute_type != TREE_TYPE (type))
2163 if (optab_handler (opl, TYPE_MODE (compute_type))
2164 == CODE_FOR_nothing
2165 || optab_handler (opr, TYPE_MODE (compute_type))
2166 == CODE_FOR_nothing
2167 || optab_handler (opo, TYPE_MODE (compute_type))
2168 == CODE_FOR_nothing)
2169 compute_type = TREE_TYPE (type);
2174 else
2175 op = optab_for_tree_code (code, type, optab_default);
2177 /* Optabs will try converting a negation into a subtraction, so
2178 look for it as well. TODO: negation of floating-point vectors
2179 might be turned into an exclusive OR toggling the sign bit. */
2180 if (op == unknown_optab
2181 && code == NEGATE_EXPR
2182 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2183 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2185 if (compute_type == NULL_TREE)
2186 compute_type = get_compute_type (code, op, type);
2187 if (compute_type == type)
2188 return;
2190 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
2192 /* Leave expression untouched for later expansion. */
2193 if (new_rhs == NULL_TREE)
2194 return;
2196 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2197 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2198 new_rhs);
2200 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
2201 way to do it is change expand_vector_operation and its callees to
2202 return a tree_code, RHS1 and RHS2 instead of a tree. */
2203 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2204 update_stmt (gsi_stmt (*gsi));
2207 /* Use this to lower vector operations introduced by the vectorizer,
2208 if it may need the bit-twiddling tricks implemented in this file. */
2210 static unsigned int
2211 expand_vector_operations (void)
2213 gimple_stmt_iterator gsi;
2214 basic_block bb;
2215 bool cfg_changed = false;
2217 FOR_EACH_BB_FN (bb, cfun)
2219 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2221 expand_vector_operations_1 (&gsi);
2222 /* ??? If we do not cleanup EH then we will ICE in
2223 verification. But in reality we have created wrong-code
2224 as we did not properly transition EH info and edges to
2225 the piecewise computations. */
2226 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2227 && gimple_purge_dead_eh_edges (bb))
2228 cfg_changed = true;
2232 return cfg_changed ? TODO_cleanup_cfg : 0;
2235 namespace {
2237 const pass_data pass_data_lower_vector =
2239 GIMPLE_PASS, /* type */
2240 "veclower", /* name */
2241 OPTGROUP_VEC, /* optinfo_flags */
2242 TV_NONE, /* tv_id */
2243 PROP_cfg, /* properties_required */
2244 PROP_gimple_lvec, /* properties_provided */
2245 0, /* properties_destroyed */
2246 0, /* todo_flags_start */
2247 TODO_update_ssa, /* todo_flags_finish */
2250 class pass_lower_vector : public gimple_opt_pass
2252 public:
2253 pass_lower_vector (gcc::context *ctxt)
2254 : gimple_opt_pass (pass_data_lower_vector, ctxt)
2257 /* opt_pass methods: */
2258 virtual bool gate (function *fun)
2260 return !(fun->curr_properties & PROP_gimple_lvec);
2263 virtual unsigned int execute (function *)
2265 return expand_vector_operations ();
2268 }; // class pass_lower_vector
2270 } // anon namespace
2272 gimple_opt_pass *
2273 make_pass_lower_vector (gcc::context *ctxt)
2275 return new pass_lower_vector (ctxt);
2278 namespace {
2280 const pass_data pass_data_lower_vector_ssa =
2282 GIMPLE_PASS, /* type */
2283 "veclower2", /* name */
2284 OPTGROUP_VEC, /* optinfo_flags */
2285 TV_NONE, /* tv_id */
2286 PROP_cfg, /* properties_required */
2287 PROP_gimple_lvec, /* properties_provided */
2288 0, /* properties_destroyed */
2289 0, /* todo_flags_start */
2290 ( TODO_update_ssa
2291 | TODO_cleanup_cfg ), /* todo_flags_finish */
2294 class pass_lower_vector_ssa : public gimple_opt_pass
2296 public:
2297 pass_lower_vector_ssa (gcc::context *ctxt)
2298 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2301 /* opt_pass methods: */
2302 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
2303 virtual unsigned int execute (function *)
2305 return expand_vector_operations ();
2308 }; // class pass_lower_vector_ssa
2310 } // anon namespace
2312 gimple_opt_pass *
2313 make_pass_lower_vector_ssa (gcc::context *ctxt)
2315 return new pass_lower_vector_ssa (ctxt);
2318 #include "gt-tree-vect-generic.h"