aix: Support libsupc++ as a FAT library
[official-gcc.git] / gcc / tree-vect-generic.c
blob6d5d65195ae6e5b8a5c8343c5f3ab67907b10f8f
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "tree-ssa-dce.h"
44 #include "recog.h" /* FIXME: for insn_data */
47 static void expand_vector_operations_1 (gimple_stmt_iterator *, bitmap);
49 /* Return the number of elements in a vector type TYPE that we have
50 already decided needs to be expanded piecewise. We don't support
51 this kind of expansion for variable-length vectors, since we should
52 always check for target support before introducing uses of those. */
53 static unsigned int
54 nunits_for_known_piecewise_op (const_tree type)
56 return TYPE_VECTOR_SUBPARTS (type).to_constant ();
59 /* Return true if TYPE1 has more elements than TYPE2, where either
60 type may be a vector or a scalar. */
62 static inline bool
63 subparts_gt (tree type1, tree type2)
65 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
66 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
67 return known_gt (n1, n2);
70 /* Build a constant of type TYPE, made of VALUE's bits replicated
71 every WIDTH bits to fit TYPE's precision. */
72 static tree
73 build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
75 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
76 / HOST_BITS_PER_WIDE_INT;
77 unsigned HOST_WIDE_INT low, mask;
78 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
79 int i;
81 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
83 if (width == HOST_BITS_PER_WIDE_INT)
84 low = value;
85 else
87 mask = ((HOST_WIDE_INT)1 << width) - 1;
88 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
91 for (i = 0; i < n; i++)
92 a[i] = low;
94 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
95 return wide_int_to_tree
96 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
99 static GTY(()) tree vector_inner_type;
100 static GTY(()) tree vector_last_type;
101 static GTY(()) int vector_last_nunits;
103 /* Return a suitable vector types made of SUBPARTS units each of mode
104 "word_mode" (the global variable). */
105 static tree
106 build_word_mode_vector_type (int nunits)
108 if (!vector_inner_type)
109 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
110 else if (vector_last_nunits == nunits)
112 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
113 return vector_last_type;
116 vector_last_nunits = nunits;
117 vector_last_type = build_vector_type (vector_inner_type, nunits);
118 return vector_last_type;
121 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
122 tree, tree, tree, tree, tree, enum tree_code,
123 tree);
125 tree
126 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
127 tree t, tree bitsize, tree bitpos)
129 if (TREE_CODE (t) == SSA_NAME)
131 gimple *def_stmt = SSA_NAME_DEF_STMT (t);
132 if (is_gimple_assign (def_stmt)
133 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
134 || (bitpos
135 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
136 t = gimple_assign_rhs1 (def_stmt);
138 if (bitpos)
140 if (TREE_CODE (type) == BOOLEAN_TYPE)
142 tree itype
143 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
144 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
145 bitsize, bitpos);
146 return gimplify_build2 (gsi, NE_EXPR, type, field,
147 build_zero_cst (itype));
149 else
150 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
152 else
153 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
156 static tree
157 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
158 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
159 enum tree_code code, tree type ATTRIBUTE_UNUSED)
161 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
162 return gimplify_build1 (gsi, code, inner_type, a);
165 static tree
166 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
167 tree bitpos, tree bitsize, enum tree_code code,
168 tree type ATTRIBUTE_UNUSED)
170 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
171 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
172 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
173 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
174 return gimplify_build2 (gsi, code, inner_type, a, b);
177 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
179 INNER_TYPE is the type of A and B elements
181 returned expression is of signed integer type with the
182 size equal to the size of INNER_TYPE. */
183 static tree
184 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
185 tree bitpos, tree bitsize, enum tree_code code, tree type)
187 tree stype = TREE_TYPE (type);
188 tree cst_false = build_zero_cst (stype);
189 tree cst_true = build_all_ones_cst (stype);
190 tree cmp;
192 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
193 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
195 cmp = build2 (code, boolean_type_node, a, b);
196 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
199 /* Expand vector addition to scalars. This does bit twiddling
200 in order to increase parallelism:
202 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
203 (a ^ b) & 0x80808080
205 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
206 (a ^ ~b) & 0x80808080
208 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
210 This optimization should be done only if 4 vector items or more
211 fit into a word. */
212 static tree
213 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
214 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
215 enum tree_code code, tree type ATTRIBUTE_UNUSED)
217 unsigned int width = vector_element_bits (TREE_TYPE (a));
218 tree inner_type = TREE_TYPE (TREE_TYPE (a));
219 unsigned HOST_WIDE_INT max;
220 tree low_bits, high_bits, a_low, b_low, result_low, signs;
222 max = GET_MODE_MASK (TYPE_MODE (inner_type));
223 low_bits = build_replicated_const (word_type, width, max >> 1);
224 high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
226 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
227 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
229 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
230 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
231 if (code == PLUS_EXPR)
232 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
233 else
235 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
236 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
239 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
240 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
241 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
244 static tree
245 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
246 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
247 tree bitsize ATTRIBUTE_UNUSED,
248 enum tree_code code ATTRIBUTE_UNUSED,
249 tree type ATTRIBUTE_UNUSED)
251 unsigned int width = vector_element_bits (TREE_TYPE (b));
252 tree inner_type = TREE_TYPE (TREE_TYPE (b));
253 HOST_WIDE_INT max;
254 tree low_bits, high_bits, b_low, result_low, signs;
256 max = GET_MODE_MASK (TYPE_MODE (inner_type));
257 low_bits = build_replicated_const (word_type, width, max >> 1);
258 high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
260 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
262 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
263 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
264 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
265 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
266 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
269 /* Expand a vector operation to scalars, by using many operations
270 whose type is the vector type's inner type. */
271 static tree
272 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
273 tree type, tree inner_type,
274 tree a, tree b, enum tree_code code,
275 tree ret_type = NULL_TREE)
277 vec<constructor_elt, va_gc> *v;
278 tree part_width = TYPE_SIZE (inner_type);
279 tree index = bitsize_int (0);
280 int nunits = nunits_for_known_piecewise_op (type);
281 int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
282 int i;
283 location_t loc = gimple_location (gsi_stmt (*gsi));
285 if (ret_type
286 || types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
287 warning_at (loc, OPT_Wvector_operation_performance,
288 "vector operation will be expanded piecewise");
289 else
290 warning_at (loc, OPT_Wvector_operation_performance,
291 "vector operation will be expanded in parallel");
293 if (!ret_type)
294 ret_type = type;
295 vec_alloc (v, (nunits + delta - 1) / delta);
296 for (i = 0; i < nunits;
297 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
299 tree result = f (gsi, inner_type, a, b, index, part_width, code,
300 ret_type);
301 constructor_elt ce = {NULL_TREE, result};
302 v->quick_push (ce);
305 return build_constructor (ret_type, v);
308 /* Expand a vector operation to scalars with the freedom to use
309 a scalar integer type, or to use a different size for the items
310 in the vector type. */
311 static tree
312 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
313 tree a, tree b, enum tree_code code)
315 tree result, compute_type;
316 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
317 location_t loc = gimple_location (gsi_stmt (*gsi));
319 /* We have three strategies. If the type is already correct, just do
320 the operation an element at a time. Else, if the vector is wider than
321 one word, do it a word at a time; finally, if the vector is smaller
322 than one word, do it as a scalar. */
323 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
324 return expand_vector_piecewise (gsi, f,
325 type, TREE_TYPE (type),
326 a, b, code);
327 else if (n_words > 1)
329 tree word_type = build_word_mode_vector_type (n_words);
330 result = expand_vector_piecewise (gsi, f,
331 word_type, TREE_TYPE (word_type),
332 a, b, code);
333 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
334 GSI_SAME_STMT);
336 else
338 /* Use a single scalar operation with a mode no wider than word_mode. */
339 scalar_int_mode mode
340 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
341 compute_type = lang_hooks.types.type_for_mode (mode, 1);
342 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
343 warning_at (loc, OPT_Wvector_operation_performance,
344 "vector operation will be expanded with a "
345 "single scalar operation");
348 return result;
351 /* Expand a vector operation to scalars; for integer types we can use
352 special bit twiddling tricks to do the sums a word at a time, using
353 function F_PARALLEL instead of F. These tricks are done only if
354 they can process at least four items, that is, only if the vector
355 holds at least four items and if a word can hold four items. */
356 static tree
357 expand_vector_addition (gimple_stmt_iterator *gsi,
358 elem_op_func f, elem_op_func f_parallel,
359 tree type, tree a, tree b, enum tree_code code)
361 int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
363 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
364 && parts_per_word >= 4
365 && nunits_for_known_piecewise_op (type) >= 4)
366 return expand_vector_parallel (gsi, f_parallel,
367 type, a, b, code);
368 else
369 return expand_vector_piecewise (gsi, f,
370 type, TREE_TYPE (type),
371 a, b, code);
374 static bool
375 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names);
377 /* Try to expand vector comparison expression OP0 CODE OP1 by
378 querying optab if the following expression:
379 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
380 can be expanded. */
381 static tree
382 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
383 tree op1, enum tree_code code,
384 bitmap dce_ssa_names)
386 tree lhs = gimple_assign_lhs (gsi_stmt (*gsi));
387 use_operand_p use_p;
388 imm_use_iterator iterator;
389 bool vec_cond_expr_only = true;
391 /* As seen in PR95830, we should not expand comparisons that are only
392 feeding a VEC_COND_EXPR statement. */
393 auto_vec<gimple *> uses;
394 FOR_EACH_IMM_USE_FAST (use_p, iterator, lhs)
395 uses.safe_push (USE_STMT (use_p));
397 for (unsigned i = 0; i < uses.length (); i ++)
399 gassign *use = dyn_cast<gassign *> (uses[i]);
400 if (use != NULL
401 && gimple_assign_rhs_code (use) == VEC_COND_EXPR
402 && gimple_assign_rhs1 (use) == lhs)
404 gimple_stmt_iterator it = gsi_for_stmt (use);
405 if (!expand_vector_condition (&it, dce_ssa_names))
407 vec_cond_expr_only = false;
408 break;
411 else
413 vec_cond_expr_only = false;
414 break;
418 if (!uses.is_empty () && vec_cond_expr_only)
419 return NULL_TREE;
421 tree t;
422 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code))
424 if (VECTOR_BOOLEAN_TYPE_P (type)
425 && SCALAR_INT_MODE_P (TYPE_MODE (type))
426 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
427 TYPE_VECTOR_SUBPARTS (type)
428 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
429 (TREE_TYPE (type)))))
431 tree inner_type = TREE_TYPE (TREE_TYPE (op0));
432 tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
433 tree index = bitsize_int (0);
434 int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
435 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
436 tree ret_type = build_nonstandard_integer_type (prec, 1);
437 tree ret_inner_type = boolean_type_node;
438 int i;
439 location_t loc = gimple_location (gsi_stmt (*gsi));
440 t = build_zero_cst (ret_type);
442 if (TYPE_PRECISION (ret_inner_type) != 1)
443 ret_inner_type = build_nonstandard_integer_type (1, 1);
444 warning_at (loc, OPT_Wvector_operation_performance,
445 "vector operation will be expanded piecewise");
446 for (i = 0; i < nunits;
447 i++, index = int_const_binop (PLUS_EXPR, index, part_width))
449 tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
450 index);
451 tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
452 index);
453 tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
454 t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
455 bitsize_int (i));
457 t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
459 else
460 t = expand_vector_piecewise (gsi, do_compare, type,
461 TREE_TYPE (TREE_TYPE (op0)), op0, op1,
462 code);
464 else
465 t = NULL_TREE;
467 return t;
470 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
471 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
472 the result if successful, otherwise return NULL_TREE. */
473 static tree
474 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
476 optab op;
477 unsigned int i, nunits = nunits_for_known_piecewise_op (type);
478 bool scalar_shift = true;
480 for (i = 1; i < nunits; i++)
482 if (shiftcnts[i] != shiftcnts[0])
483 scalar_shift = false;
486 if (scalar_shift && shiftcnts[0] == 0)
487 return op0;
489 if (scalar_shift)
491 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
492 if (op != unknown_optab
493 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
494 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
495 build_int_cst (NULL_TREE, shiftcnts[0]));
498 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
499 if (op != unknown_optab
500 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
502 tree_vector_builder vec (type, nunits, 1);
503 for (i = 0; i < nunits; i++)
504 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
505 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
508 return NULL_TREE;
511 /* Try to expand integer vector division by constant using
512 widening multiply, shifts and additions. */
513 static tree
514 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
515 tree op1, enum tree_code code)
517 bool use_pow2 = true;
518 bool has_vector_shift = true;
519 bool use_abs_op1 = false;
520 int mode = -1, this_mode;
521 int pre_shift = -1, post_shift;
522 unsigned int nunits = nunits_for_known_piecewise_op (type);
523 int *shifts = XALLOCAVEC (int, nunits * 4);
524 int *pre_shifts = shifts + nunits;
525 int *post_shifts = pre_shifts + nunits;
526 int *shift_temps = post_shifts + nunits;
527 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
528 int prec = TYPE_PRECISION (TREE_TYPE (type));
529 int dummy_int;
530 unsigned int i;
531 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
532 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
533 tree cur_op, mulcst, tem;
534 optab op;
536 if (prec > HOST_BITS_PER_WIDE_INT)
537 return NULL_TREE;
539 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
540 if (op == unknown_optab
541 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
542 has_vector_shift = false;
544 /* Analysis phase. Determine if all op1 elements are either power
545 of two and it is possible to expand it using shifts (or for remainder
546 using masking). Additionally compute the multiplicative constants
547 and pre and post shifts if the division is to be expanded using
548 widening or high part multiplication plus shifts. */
549 for (i = 0; i < nunits; i++)
551 tree cst = VECTOR_CST_ELT (op1, i);
552 unsigned HOST_WIDE_INT ml;
554 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
555 return NULL_TREE;
556 pre_shifts[i] = 0;
557 post_shifts[i] = 0;
558 mulc[i] = 0;
559 if (use_pow2
560 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
561 use_pow2 = false;
562 if (use_pow2)
564 shifts[i] = tree_log2 (cst);
565 if (shifts[i] != shifts[0]
566 && code == TRUNC_DIV_EXPR
567 && !has_vector_shift)
568 use_pow2 = false;
570 if (mode == -2)
571 continue;
572 if (sign_p == UNSIGNED)
574 unsigned HOST_WIDE_INT mh;
575 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
577 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
578 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
579 return NULL_TREE;
581 if (d <= 1)
583 mode = -2;
584 continue;
587 /* Find a suitable multiplier and right shift count
588 instead of multiplying with D. */
589 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
591 /* If the suggested multiplier is more than SIZE bits, we can
592 do better for even divisors, using an initial right shift. */
593 if ((mh != 0 && (d & 1) == 0)
594 || (!has_vector_shift && pre_shift != -1))
596 if (has_vector_shift)
597 pre_shift = ctz_or_zero (d);
598 else if (pre_shift == -1)
600 unsigned int j;
601 for (j = 0; j < nunits; j++)
603 tree cst2 = VECTOR_CST_ELT (op1, j);
604 unsigned HOST_WIDE_INT d2;
605 int this_pre_shift;
607 if (!tree_fits_uhwi_p (cst2))
608 return NULL_TREE;
609 d2 = tree_to_uhwi (cst2) & mask;
610 if (d2 == 0)
611 return NULL_TREE;
612 this_pre_shift = floor_log2 (d2 & -d2);
613 if (pre_shift == -1 || this_pre_shift < pre_shift)
614 pre_shift = this_pre_shift;
616 if (i != 0 && pre_shift != 0)
618 /* Restart. */
619 i = -1U;
620 mode = -1;
621 continue;
624 if (pre_shift != 0)
626 if ((d >> pre_shift) <= 1)
628 mode = -2;
629 continue;
631 mh = choose_multiplier (d >> pre_shift, prec,
632 prec - pre_shift,
633 &ml, &post_shift, &dummy_int);
634 gcc_assert (!mh);
635 pre_shifts[i] = pre_shift;
638 if (!mh)
639 this_mode = 0;
640 else
641 this_mode = 1;
643 else
645 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
646 unsigned HOST_WIDE_INT abs_d;
648 if (d == -1)
649 return NULL_TREE;
651 /* Since d might be INT_MIN, we have to cast to
652 unsigned HOST_WIDE_INT before negating to avoid
653 undefined signed overflow. */
654 abs_d = (d >= 0
655 ? (unsigned HOST_WIDE_INT) d
656 : - (unsigned HOST_WIDE_INT) d);
658 /* n rem d = n rem -d */
659 if (code == TRUNC_MOD_EXPR && d < 0)
661 d = abs_d;
662 use_abs_op1 = true;
664 if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
666 /* This case is not handled correctly below. */
667 mode = -2;
668 continue;
670 if (abs_d <= 1)
672 mode = -2;
673 continue;
676 choose_multiplier (abs_d, prec, prec - 1, &ml,
677 &post_shift, &dummy_int);
678 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
680 this_mode = 4 + (d < 0);
681 ml |= HOST_WIDE_INT_M1U << (prec - 1);
683 else
684 this_mode = 2 + (d < 0);
686 mulc[i] = ml;
687 post_shifts[i] = post_shift;
688 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
689 || post_shift >= prec
690 || pre_shifts[i] >= prec)
691 this_mode = -2;
693 if (i == 0)
694 mode = this_mode;
695 else if (mode != this_mode)
696 mode = -2;
699 if (use_pow2)
701 tree addend = NULL_TREE;
702 if (sign_p == SIGNED)
704 tree uns_type;
706 /* Both division and remainder sequences need
707 op0 < 0 ? mask : 0 computed. It can be either computed as
708 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
709 if none of the shifts is 0, or as the conditional. */
710 for (i = 0; i < nunits; i++)
711 if (shifts[i] == 0)
712 break;
713 uns_type
714 = build_vector_type (build_nonstandard_integer_type (prec, 1),
715 nunits);
716 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
718 for (i = 0; i < nunits; i++)
719 shift_temps[i] = prec - 1;
720 cur_op = add_rshift (gsi, type, op0, shift_temps);
721 if (cur_op != NULL_TREE)
723 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
724 uns_type, cur_op);
725 for (i = 0; i < nunits; i++)
726 shift_temps[i] = prec - shifts[i];
727 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
728 if (cur_op != NULL_TREE)
729 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
730 type, cur_op);
733 if (addend == NULL_TREE
734 && expand_vec_cond_expr_p (type, type, LT_EXPR))
736 tree zero, cst, mask_type, mask;
737 gimple *stmt, *cond;
739 mask_type = truth_type_for (type);
740 zero = build_zero_cst (type);
741 mask = make_ssa_name (mask_type);
742 cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
743 gsi_insert_before (gsi, cond, GSI_SAME_STMT);
744 tree_vector_builder vec (type, nunits, 1);
745 for (i = 0; i < nunits; i++)
746 vec.quick_push (build_int_cst (TREE_TYPE (type),
747 (HOST_WIDE_INT_1U
748 << shifts[i]) - 1));
749 cst = vec.build ();
750 addend = make_ssa_name (type);
751 stmt
752 = gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
753 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
756 if (code == TRUNC_DIV_EXPR)
758 if (sign_p == UNSIGNED)
760 /* q = op0 >> shift; */
761 cur_op = add_rshift (gsi, type, op0, shifts);
762 if (cur_op != NULL_TREE)
763 return cur_op;
765 else if (addend != NULL_TREE)
767 /* t1 = op0 + addend;
768 q = t1 >> shift; */
769 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
770 if (op != unknown_optab
771 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
773 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
774 cur_op = add_rshift (gsi, type, cur_op, shifts);
775 if (cur_op != NULL_TREE)
776 return cur_op;
780 else
782 tree mask;
783 tree_vector_builder vec (type, nunits, 1);
784 for (i = 0; i < nunits; i++)
785 vec.quick_push (build_int_cst (TREE_TYPE (type),
786 (HOST_WIDE_INT_1U
787 << shifts[i]) - 1));
788 mask = vec.build ();
789 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
790 if (op != unknown_optab
791 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
793 if (sign_p == UNSIGNED)
794 /* r = op0 & mask; */
795 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
796 else if (addend != NULL_TREE)
798 /* t1 = op0 + addend;
799 t2 = t1 & mask;
800 r = t2 - addend; */
801 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
802 if (op != unknown_optab
803 && optab_handler (op, TYPE_MODE (type))
804 != CODE_FOR_nothing)
806 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
807 addend);
808 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
809 cur_op, mask);
810 op = optab_for_tree_code (MINUS_EXPR, type,
811 optab_default);
812 if (op != unknown_optab
813 && optab_handler (op, TYPE_MODE (type))
814 != CODE_FOR_nothing)
815 return gimplify_build2 (gsi, MINUS_EXPR, type,
816 cur_op, addend);
823 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
824 return NULL_TREE;
826 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
827 return NULL_TREE;
829 cur_op = op0;
831 switch (mode)
833 case 0:
834 gcc_assert (sign_p == UNSIGNED);
835 /* t1 = oprnd0 >> pre_shift;
836 t2 = t1 h* ml;
837 q = t2 >> post_shift; */
838 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
839 if (cur_op == NULL_TREE)
840 return NULL_TREE;
841 break;
842 case 1:
843 gcc_assert (sign_p == UNSIGNED);
844 for (i = 0; i < nunits; i++)
846 shift_temps[i] = 1;
847 post_shifts[i]--;
849 break;
850 case 2:
851 case 3:
852 case 4:
853 case 5:
854 gcc_assert (sign_p == SIGNED);
855 for (i = 0; i < nunits; i++)
856 shift_temps[i] = prec - 1;
857 break;
858 default:
859 return NULL_TREE;
862 tree_vector_builder vec (type, nunits, 1);
863 for (i = 0; i < nunits; i++)
864 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
865 mulcst = vec.build ();
867 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
869 switch (mode)
871 case 0:
872 /* t1 = oprnd0 >> pre_shift;
873 t2 = t1 h* ml;
874 q = t2 >> post_shift; */
875 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
876 break;
877 case 1:
878 /* t1 = oprnd0 h* ml;
879 t2 = oprnd0 - t1;
880 t3 = t2 >> 1;
881 t4 = t1 + t3;
882 q = t4 >> (post_shift - 1); */
883 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
884 if (op == unknown_optab
885 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
886 return NULL_TREE;
887 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
888 tem = add_rshift (gsi, type, tem, shift_temps);
889 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
890 if (op == unknown_optab
891 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
892 return NULL_TREE;
893 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
894 cur_op = add_rshift (gsi, type, tem, post_shifts);
895 if (cur_op == NULL_TREE)
896 return NULL_TREE;
897 break;
898 case 2:
899 case 3:
900 case 4:
901 case 5:
902 /* t1 = oprnd0 h* ml;
903 t2 = t1; [ iff (mode & 2) != 0 ]
904 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
905 t3 = t2 >> post_shift;
906 t4 = oprnd0 >> (prec - 1);
907 q = t3 - t4; [ iff (mode & 1) == 0 ]
908 q = t4 - t3; [ iff (mode & 1) != 0 ] */
909 if ((mode & 2) == 0)
911 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
912 if (op == unknown_optab
913 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
914 return NULL_TREE;
915 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
917 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
918 if (cur_op == NULL_TREE)
919 return NULL_TREE;
920 tem = add_rshift (gsi, type, op0, shift_temps);
921 if (tem == NULL_TREE)
922 return NULL_TREE;
923 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
924 if (op == unknown_optab
925 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
926 return NULL_TREE;
927 if ((mode & 1) == 0)
928 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
929 else
930 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
931 break;
932 default:
933 gcc_unreachable ();
936 if (code == TRUNC_DIV_EXPR)
937 return cur_op;
939 /* We divided. Now finish by:
940 t1 = q * oprnd1;
941 r = oprnd0 - t1; */
942 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
943 if (op == unknown_optab
944 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
945 return NULL_TREE;
946 if (use_abs_op1)
948 tree_vector_builder elts;
949 if (!elts.new_unary_operation (type, op1, false))
950 return NULL_TREE;
951 unsigned int count = elts.encoded_nelts ();
952 for (unsigned int i = 0; i < count; ++i)
954 tree elem1 = VECTOR_CST_ELT (op1, i);
956 tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
957 if (elt == NULL_TREE)
958 return NULL_TREE;
959 elts.quick_push (elt);
961 op1 = elts.build ();
963 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
964 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
965 if (op == unknown_optab
966 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
967 return NULL_TREE;
968 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
971 /* Expand a vector condition to scalars, by using many conditions
972 on the vector's elements. */
974 static bool
975 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names)
977 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
978 tree type = gimple_expr_type (stmt);
979 tree a = gimple_assign_rhs1 (stmt);
980 tree a1 = a;
981 tree a2 = NULL_TREE;
982 bool a_is_comparison = false;
983 bool a_is_scalar_bitmask = false;
984 tree b = gimple_assign_rhs2 (stmt);
985 tree c = gimple_assign_rhs3 (stmt);
986 vec<constructor_elt, va_gc> *v;
987 tree constr;
988 tree inner_type = TREE_TYPE (type);
989 tree width = vector_element_bits_tree (type);
990 tree cond_type = TREE_TYPE (TREE_TYPE (a));
991 tree comp_inner_type = cond_type;
992 tree index = bitsize_int (0);
993 tree comp_width = width;
994 tree comp_index = index;
995 location_t loc = gimple_location (gsi_stmt (*gsi));
996 tree_code code = TREE_CODE (a);
997 gassign *assign = NULL;
999 if (code == SSA_NAME)
1001 assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (a));
1002 if (assign != NULL
1003 && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
1005 a_is_comparison = true;
1006 a1 = gimple_assign_rhs1 (assign);
1007 a2 = gimple_assign_rhs2 (assign);
1008 code = gimple_assign_rhs_code (assign);
1009 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
1010 comp_width = vector_element_bits_tree (TREE_TYPE (a1));
1014 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code))
1016 gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
1017 return true;
1020 /* Handle vector boolean types with bitmasks. If there is a comparison
1021 and we can expand the comparison into the vector boolean bitmask,
1022 or otherwise if it is compatible with type, we can transform
1023 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
1024 into
1025 tmp_6 = x_2 < y_3;
1026 tmp_7 = tmp_6 & vbfld_4;
1027 tmp_8 = ~tmp_6;
1028 tmp_9 = tmp_8 & vbfld_5;
1029 vbfld_1 = tmp_7 | tmp_9;
1030 Similarly for vbfld_10 instead of x_2 < y_3. */
1031 if (VECTOR_BOOLEAN_TYPE_P (type)
1032 && SCALAR_INT_MODE_P (TYPE_MODE (type))
1033 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
1034 TYPE_VECTOR_SUBPARTS (type)
1035 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
1036 && (a_is_comparison
1037 ? useless_type_conversion_p (type, TREE_TYPE (a))
1038 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
1040 if (a_is_comparison)
1041 a = gimplify_build2 (gsi, code, type, a1, a2);
1042 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
1043 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
1044 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
1045 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
1046 gimple_assign_set_rhs_from_tree (gsi, a);
1047 update_stmt (gsi_stmt (*gsi));
1048 return true;
1051 /* TODO: try and find a smaller vector type. */
1053 warning_at (loc, OPT_Wvector_operation_performance,
1054 "vector condition will be expanded piecewise");
1056 if (!a_is_comparison
1057 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1058 && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1059 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1060 TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1061 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1062 (TREE_TYPE (TREE_TYPE (a))))))
1064 a_is_scalar_bitmask = true;
1065 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1066 tree atype = build_nonstandard_integer_type (prec, 1);
1067 a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1070 int nunits = nunits_for_known_piecewise_op (type);
1071 vec_alloc (v, nunits);
1072 for (int i = 0; i < nunits; i++)
1074 tree aa, result;
1075 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1076 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1077 if (a_is_comparison)
1079 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1080 comp_width, comp_index);
1081 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1082 comp_width, comp_index);
1083 aa = fold_build2 (code, cond_type, aa1, aa2);
1085 else if (a_is_scalar_bitmask)
1087 wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1088 result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1089 a, wide_int_to_tree (TREE_TYPE (a), w));
1090 aa = fold_build2 (NE_EXPR, boolean_type_node, result,
1091 build_zero_cst (TREE_TYPE (a)));
1093 else
1094 aa = tree_vec_extract (gsi, cond_type, a, width, index);
1095 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1096 constructor_elt ce = {NULL_TREE, result};
1097 v->quick_push (ce);
1098 index = int_const_binop (PLUS_EXPR, index, width);
1099 if (width == comp_width)
1100 comp_index = index;
1101 else
1102 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1105 constr = build_constructor (type, v);
1106 gimple_assign_set_rhs_from_tree (gsi, constr);
1107 update_stmt (gsi_stmt (*gsi));
1109 if (a_is_comparison)
1110 bitmap_set_bit (dce_ssa_names,
1111 SSA_NAME_VERSION (gimple_assign_lhs (assign)));
1113 return false;
1116 static tree
1117 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1118 gassign *assign, enum tree_code code,
1119 bitmap dce_ssa_names)
1121 machine_mode compute_mode = TYPE_MODE (compute_type);
1123 /* If the compute mode is not a vector mode (hence we are not decomposing
1124 a BLKmode vector to smaller, hardware-supported vectors), we may want
1125 to expand the operations in parallel. */
1126 if (!VECTOR_MODE_P (compute_mode))
1127 switch (code)
1129 case PLUS_EXPR:
1130 case MINUS_EXPR:
1131 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1132 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1133 gimple_assign_rhs1 (assign),
1134 gimple_assign_rhs2 (assign), code);
1135 break;
1137 case NEGATE_EXPR:
1138 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1139 return expand_vector_addition (gsi, do_unop, do_negate, type,
1140 gimple_assign_rhs1 (assign),
1141 NULL_TREE, code);
1142 break;
1144 case BIT_AND_EXPR:
1145 case BIT_IOR_EXPR:
1146 case BIT_XOR_EXPR:
1147 return expand_vector_parallel (gsi, do_binop, type,
1148 gimple_assign_rhs1 (assign),
1149 gimple_assign_rhs2 (assign), code);
1151 case BIT_NOT_EXPR:
1152 return expand_vector_parallel (gsi, do_unop, type,
1153 gimple_assign_rhs1 (assign),
1154 NULL_TREE, code);
1155 case EQ_EXPR:
1156 case NE_EXPR:
1157 case GT_EXPR:
1158 case LT_EXPR:
1159 case GE_EXPR:
1160 case LE_EXPR:
1161 case UNEQ_EXPR:
1162 case UNGT_EXPR:
1163 case UNLT_EXPR:
1164 case UNGE_EXPR:
1165 case UNLE_EXPR:
1166 case LTGT_EXPR:
1167 case ORDERED_EXPR:
1168 case UNORDERED_EXPR:
1170 tree rhs1 = gimple_assign_rhs1 (assign);
1171 tree rhs2 = gimple_assign_rhs2 (assign);
1173 return expand_vector_comparison (gsi, type, rhs1, rhs2, code,
1174 dce_ssa_names);
1177 case TRUNC_DIV_EXPR:
1178 case TRUNC_MOD_EXPR:
1180 tree rhs1 = gimple_assign_rhs1 (assign);
1181 tree rhs2 = gimple_assign_rhs2 (assign);
1182 tree ret;
1184 if (!optimize
1185 || !VECTOR_INTEGER_TYPE_P (type)
1186 || TREE_CODE (rhs2) != VECTOR_CST
1187 || !VECTOR_MODE_P (TYPE_MODE (type)))
1188 break;
1190 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1191 if (ret != NULL_TREE)
1192 return ret;
1193 break;
1196 default:
1197 break;
1200 if (TREE_CODE_CLASS (code) == tcc_unary)
1201 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1202 gimple_assign_rhs1 (assign),
1203 NULL_TREE, code);
1204 else
1205 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1206 gimple_assign_rhs1 (assign),
1207 gimple_assign_rhs2 (assign), code);
1210 /* Try to optimize
1211 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1212 style stmts into:
1213 _9 = { b_7, b_7, b_7, b_7 };
1214 a_5 = _9 + { 0, 3, 6, 9 };
1215 because vector splat operation is usually more efficient
1216 than piecewise initialization of the vector. */
1218 static void
1219 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1221 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1222 tree lhs = gimple_assign_lhs (stmt);
1223 tree rhs = gimple_assign_rhs1 (stmt);
1224 tree type = TREE_TYPE (rhs);
1225 unsigned int i, j;
1226 unsigned HOST_WIDE_INT nelts;
1227 bool all_same = true;
1228 constructor_elt *elt;
1229 gimple *g;
1230 tree base = NULL_TREE;
1231 optab op;
1233 if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1234 || nelts <= 2
1235 || CONSTRUCTOR_NELTS (rhs) != nelts)
1236 return;
1237 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1238 if (op == unknown_optab
1239 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1240 return;
1241 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1242 if (TREE_CODE (elt->value) != SSA_NAME
1243 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1244 return;
1245 else
1247 tree this_base = elt->value;
1248 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1249 all_same = false;
1250 for (j = 0; j < nelts + 1; j++)
1252 g = SSA_NAME_DEF_STMT (this_base);
1253 if (is_gimple_assign (g)
1254 && gimple_assign_rhs_code (g) == PLUS_EXPR
1255 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1256 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1257 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1258 this_base = gimple_assign_rhs1 (g);
1259 else
1260 break;
1262 if (i == 0)
1263 base = this_base;
1264 else if (this_base != base)
1265 return;
1267 if (all_same)
1268 return;
1269 tree_vector_builder cst (type, nelts, 1);
1270 for (i = 0; i < nelts; i++)
1272 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1273 tree elt = build_zero_cst (TREE_TYPE (base));
1274 while (this_base != base)
1276 g = SSA_NAME_DEF_STMT (this_base);
1277 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1278 elt, gimple_assign_rhs2 (g));
1279 if (elt == NULL_TREE
1280 || TREE_CODE (elt) != INTEGER_CST
1281 || TREE_OVERFLOW (elt))
1282 return;
1283 this_base = gimple_assign_rhs1 (g);
1285 cst.quick_push (elt);
1287 for (i = 0; i < nelts; i++)
1288 CONSTRUCTOR_ELT (rhs, i)->value = base;
1289 g = gimple_build_assign (make_ssa_name (type), rhs);
1290 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1291 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1292 cst.build ());
1293 gsi_replace (gsi, g, false);
1296 /* Return a type for the widest vector mode whose components are of type
1297 TYPE, or NULL_TREE if none is found. */
1299 static tree
1300 type_for_widest_vector_mode (tree type, optab op)
1302 machine_mode inner_mode = TYPE_MODE (type);
1303 machine_mode best_mode = VOIDmode, mode;
1304 poly_int64 best_nunits = 0;
1306 if (SCALAR_FLOAT_MODE_P (inner_mode))
1307 mode = MIN_MODE_VECTOR_FLOAT;
1308 else if (SCALAR_FRACT_MODE_P (inner_mode))
1309 mode = MIN_MODE_VECTOR_FRACT;
1310 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1311 mode = MIN_MODE_VECTOR_UFRACT;
1312 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1313 mode = MIN_MODE_VECTOR_ACCUM;
1314 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1315 mode = MIN_MODE_VECTOR_UACCUM;
1316 else if (inner_mode == BImode)
1317 mode = MIN_MODE_VECTOR_BOOL;
1318 else
1319 mode = MIN_MODE_VECTOR_INT;
1321 FOR_EACH_MODE_FROM (mode, mode)
1322 if (GET_MODE_INNER (mode) == inner_mode
1323 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1324 && optab_handler (op, mode) != CODE_FOR_nothing)
1325 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1327 if (best_mode == VOIDmode)
1328 return NULL_TREE;
1329 else
1330 return build_vector_type_for_mode (type, best_mode);
1334 /* Build a reference to the element of the vector VECT. Function
1335 returns either the element itself, either BIT_FIELD_REF, or an
1336 ARRAY_REF expression.
1338 GSI is required to insert temporary variables while building a
1339 refernece to the element of the vector VECT.
1341 PTMPVEC is a pointer to the temporary variable for caching
1342 purposes. In case when PTMPVEC is NULL new temporary variable
1343 will be created. */
1344 static tree
1345 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1347 tree vect_type, vect_elt_type;
1348 gimple *asgn;
1349 tree tmpvec;
1350 tree arraytype;
1351 bool need_asgn = true;
1352 unsigned int elements;
1354 vect_type = TREE_TYPE (vect);
1355 vect_elt_type = TREE_TYPE (vect_type);
1356 elements = nunits_for_known_piecewise_op (vect_type);
1358 if (TREE_CODE (idx) == INTEGER_CST)
1360 unsigned HOST_WIDE_INT index;
1362 /* Given that we're about to compute a binary modulus,
1363 we don't care about the high bits of the value. */
1364 index = TREE_INT_CST_LOW (idx);
1365 if (!tree_fits_uhwi_p (idx) || index >= elements)
1367 index &= elements - 1;
1368 idx = build_int_cst (TREE_TYPE (idx), index);
1371 /* When lowering a vector statement sequence do some easy
1372 simplification by looking through intermediate vector results. */
1373 if (TREE_CODE (vect) == SSA_NAME)
1375 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1376 if (is_gimple_assign (def_stmt)
1377 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1378 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1379 vect = gimple_assign_rhs1 (def_stmt);
1382 if (TREE_CODE (vect) == VECTOR_CST)
1383 return VECTOR_CST_ELT (vect, index);
1384 else if (TREE_CODE (vect) == CONSTRUCTOR
1385 && (CONSTRUCTOR_NELTS (vect) == 0
1386 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1387 != VECTOR_TYPE))
1389 if (index < CONSTRUCTOR_NELTS (vect))
1390 return CONSTRUCTOR_ELT (vect, index)->value;
1391 return build_zero_cst (vect_elt_type);
1393 else
1395 tree size = vector_element_bits_tree (vect_type);
1396 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1397 size);
1398 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1402 if (!ptmpvec)
1403 tmpvec = create_tmp_var (vect_type, "vectmp");
1404 else if (!*ptmpvec)
1405 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1406 else
1408 tmpvec = *ptmpvec;
1409 need_asgn = false;
1412 if (need_asgn)
1414 TREE_ADDRESSABLE (tmpvec) = 1;
1415 asgn = gimple_build_assign (tmpvec, vect);
1416 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1419 arraytype = build_array_type_nelts (vect_elt_type, elements);
1420 return build4 (ARRAY_REF, vect_elt_type,
1421 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1422 idx, NULL_TREE, NULL_TREE);
1425 /* Check if VEC_PERM_EXPR within the given setting is supported
1426 by hardware, or lower it piecewise.
1428 When VEC_PERM_EXPR has the same first and second operands:
1429 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1430 {v0[mask[0]], v0[mask[1]], ...}
1431 MASK and V0 must have the same number of elements.
1433 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1434 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1435 V0 and V1 must have the same type. MASK, V0, V1 must have the
1436 same number of arguments. */
1438 static void
1439 lower_vec_perm (gimple_stmt_iterator *gsi)
1441 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1442 tree mask = gimple_assign_rhs3 (stmt);
1443 tree vec0 = gimple_assign_rhs1 (stmt);
1444 tree vec1 = gimple_assign_rhs2 (stmt);
1445 tree vect_type = TREE_TYPE (vec0);
1446 tree mask_type = TREE_TYPE (mask);
1447 tree vect_elt_type = TREE_TYPE (vect_type);
1448 tree mask_elt_type = TREE_TYPE (mask_type);
1449 unsigned HOST_WIDE_INT elements;
1450 vec<constructor_elt, va_gc> *v;
1451 tree constr, t, si, i_val;
1452 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1453 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1454 location_t loc = gimple_location (gsi_stmt (*gsi));
1455 unsigned i;
1457 if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1458 return;
1460 if (TREE_CODE (mask) == SSA_NAME)
1462 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1463 if (is_gimple_assign (def_stmt)
1464 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1465 mask = gimple_assign_rhs1 (def_stmt);
1468 vec_perm_builder sel_int;
1470 if (TREE_CODE (mask) == VECTOR_CST
1471 && tree_to_vec_perm_builder (&sel_int, mask))
1473 vec_perm_indices indices (sel_int, 2, elements);
1474 if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1476 gimple_assign_set_rhs3 (stmt, mask);
1477 update_stmt (stmt);
1478 return;
1480 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1481 vector as VEC1 and a right element shift MASK. */
1482 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1483 != CODE_FOR_nothing
1484 && TREE_CODE (vec1) == VECTOR_CST
1485 && initializer_zerop (vec1)
1486 && maybe_ne (indices[0], 0)
1487 && known_lt (poly_uint64 (indices[0]), elements))
1489 bool ok_p = indices.series_p (0, 1, indices[0], 1);
1490 if (!ok_p)
1492 for (i = 1; i < elements; ++i)
1494 poly_uint64 actual = indices[i];
1495 poly_uint64 expected = i + indices[0];
1496 /* Indices into the second vector are all equivalent. */
1497 if (maybe_lt (actual, elements)
1498 ? maybe_ne (actual, expected)
1499 : maybe_lt (expected, elements))
1500 break;
1502 ok_p = i == elements;
1504 if (ok_p)
1506 gimple_assign_set_rhs3 (stmt, mask);
1507 update_stmt (stmt);
1508 return;
1511 /* And similarly vec_shl pattern. */
1512 if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1513 != CODE_FOR_nothing
1514 && TREE_CODE (vec0) == VECTOR_CST
1515 && initializer_zerop (vec0))
1517 unsigned int first = 0;
1518 for (i = 0; i < elements; ++i)
1519 if (known_eq (poly_uint64 (indices[i]), elements))
1521 if (i == 0 || first)
1522 break;
1523 first = i;
1525 else if (first
1526 ? maybe_ne (poly_uint64 (indices[i]),
1527 elements + i - first)
1528 : maybe_ge (poly_uint64 (indices[i]), elements))
1529 break;
1530 if (i == elements)
1532 gimple_assign_set_rhs3 (stmt, mask);
1533 update_stmt (stmt);
1534 return;
1538 else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1539 return;
1541 warning_at (loc, OPT_Wvector_operation_performance,
1542 "vector shuffling operation will be expanded piecewise");
1544 vec_alloc (v, elements);
1545 for (i = 0; i < elements; i++)
1547 si = size_int (i);
1548 i_val = vector_element (gsi, mask, si, &masktmp);
1550 if (TREE_CODE (i_val) == INTEGER_CST)
1552 unsigned HOST_WIDE_INT index;
1554 index = TREE_INT_CST_LOW (i_val);
1555 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1556 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1558 if (two_operand_p && (index & elements) != 0)
1559 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1560 else
1561 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1563 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1564 true, GSI_SAME_STMT);
1566 else
1568 tree cond = NULL_TREE, v0_val;
1570 if (two_operand_p)
1572 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1573 build_int_cst (mask_elt_type, elements));
1574 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1575 true, GSI_SAME_STMT);
1578 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1579 build_int_cst (mask_elt_type, elements - 1));
1580 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1581 true, GSI_SAME_STMT);
1583 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1584 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1585 true, GSI_SAME_STMT);
1587 if (two_operand_p)
1589 tree v1_val;
1591 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1592 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1593 true, GSI_SAME_STMT);
1595 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1596 cond, build_zero_cst (mask_elt_type));
1597 cond = fold_build3 (COND_EXPR, vect_elt_type,
1598 cond, v0_val, v1_val);
1599 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1600 true, GSI_SAME_STMT);
1602 else
1603 t = v0_val;
1606 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1609 constr = build_constructor (vect_type, v);
1610 gimple_assign_set_rhs_from_tree (gsi, constr);
1611 update_stmt (gsi_stmt (*gsi));
1614 /* If OP is a uniform vector return the element it is a splat from. */
1616 static tree
1617 ssa_uniform_vector_p (tree op)
1619 if (TREE_CODE (op) == VECTOR_CST
1620 || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1621 || TREE_CODE (op) == CONSTRUCTOR)
1622 return uniform_vector_p (op);
1623 if (TREE_CODE (op) == SSA_NAME)
1625 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1626 if (gimple_assign_single_p (def_stmt))
1627 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1629 return NULL_TREE;
1632 /* Return type in which CODE operation with optab OP can be
1633 computed. */
1635 static tree
1636 get_compute_type (enum tree_code code, optab op, tree type)
1638 /* For very wide vectors, try using a smaller vector mode. */
1639 tree compute_type = type;
1640 if (op
1641 && (!VECTOR_MODE_P (TYPE_MODE (type))
1642 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1644 tree vector_compute_type
1645 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1646 if (vector_compute_type != NULL_TREE
1647 && subparts_gt (compute_type, vector_compute_type)
1648 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1649 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1650 != CODE_FOR_nothing))
1651 compute_type = vector_compute_type;
1654 /* If we are breaking a BLKmode vector into smaller pieces,
1655 type_for_widest_vector_mode has already looked into the optab,
1656 so skip these checks. */
1657 if (compute_type == type)
1659 machine_mode compute_mode = TYPE_MODE (compute_type);
1660 if (VECTOR_MODE_P (compute_mode))
1662 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1663 return compute_type;
1664 if (code == MULT_HIGHPART_EXPR
1665 && can_mult_highpart_p (compute_mode,
1666 TYPE_UNSIGNED (compute_type)))
1667 return compute_type;
1669 /* There is no operation in hardware, so fall back to scalars. */
1670 compute_type = TREE_TYPE (type);
1673 return compute_type;
1676 static tree
1677 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1678 tree bitpos, tree bitsize, enum tree_code code,
1679 tree type ATTRIBUTE_UNUSED)
1681 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1682 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1683 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1684 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1685 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1686 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1689 /* Expand a vector COND_EXPR to scalars, piecewise. */
1690 static void
1691 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1693 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1694 tree type = gimple_expr_type (stmt);
1695 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1696 machine_mode compute_mode = TYPE_MODE (compute_type);
1697 gcc_assert (compute_mode != BLKmode);
1698 tree lhs = gimple_assign_lhs (stmt);
1699 tree rhs2 = gimple_assign_rhs2 (stmt);
1700 tree rhs3 = gimple_assign_rhs3 (stmt);
1701 tree new_rhs;
1703 /* If the compute mode is not a vector mode (hence we are not decomposing
1704 a BLKmode vector to smaller, hardware-supported vectors), we may want
1705 to expand the operations in parallel. */
1706 if (!VECTOR_MODE_P (compute_mode))
1707 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1708 COND_EXPR);
1709 else
1710 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1711 rhs2, rhs3, COND_EXPR);
1712 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1713 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1714 new_rhs);
1716 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1717 way to do it is change expand_vector_operation and its callees to
1718 return a tree_code, RHS1 and RHS2 instead of a tree. */
1719 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1720 update_stmt (gsi_stmt (*gsi));
1723 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1724 lowering. If INNER_TYPE is not a vector type, this is a scalar
1725 fallback. */
1727 static tree
1728 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1729 tree decl, tree bitpos, tree bitsize,
1730 enum tree_code code, tree type)
1732 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1733 if (!VECTOR_TYPE_P (inner_type))
1734 return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1735 if (code == CALL_EXPR)
1737 gimple *g = gimple_build_call (decl, 1, a);
1738 tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1739 gimple_call_set_lhs (g, lhs);
1740 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1741 return lhs;
1743 else
1745 tree outer_type = build_vector_type (TREE_TYPE (type),
1746 TYPE_VECTOR_SUBPARTS (inner_type));
1747 return gimplify_build1 (gsi, code, outer_type, a);
1751 /* Similarly, but for narrowing conversion. */
1753 static tree
1754 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1755 tree, tree bitpos, tree, enum tree_code code,
1756 tree type)
1758 tree itype = build_vector_type (TREE_TYPE (inner_type),
1759 exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1760 2));
1761 tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1762 tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1763 int_const_binop (PLUS_EXPR, bitpos,
1764 TYPE_SIZE (itype)));
1765 tree outer_type = build_vector_type (TREE_TYPE (type),
1766 TYPE_VECTOR_SUBPARTS (inner_type));
1767 return gimplify_build2 (gsi, code, outer_type, b, c);
1770 /* Expand VEC_CONVERT ifn call. */
1772 static void
1773 expand_vector_conversion (gimple_stmt_iterator *gsi)
1775 gimple *stmt = gsi_stmt (*gsi);
1776 gimple *g;
1777 tree lhs = gimple_call_lhs (stmt);
1778 if (lhs == NULL_TREE)
1780 g = gimple_build_nop ();
1781 gsi_replace (gsi, g, false);
1782 return;
1784 tree arg = gimple_call_arg (stmt, 0);
1785 tree ret_type = TREE_TYPE (lhs);
1786 tree arg_type = TREE_TYPE (arg);
1787 tree new_rhs, compute_type = TREE_TYPE (arg_type);
1788 enum tree_code code = NOP_EXPR;
1789 enum tree_code code1 = ERROR_MARK;
1790 enum { NARROW, NONE, WIDEN } modifier = NONE;
1791 optab optab1 = unknown_optab;
1793 gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1794 if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1795 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1796 code = FIX_TRUNC_EXPR;
1797 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1798 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1799 code = FLOAT_EXPR;
1800 unsigned int ret_elt_bits = vector_element_bits (ret_type);
1801 unsigned int arg_elt_bits = vector_element_bits (arg_type);
1802 if (ret_elt_bits < arg_elt_bits)
1803 modifier = NARROW;
1804 else if (ret_elt_bits > arg_elt_bits)
1805 modifier = WIDEN;
1807 if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1809 if (supportable_convert_operation (code, ret_type, arg_type, &code1))
1811 g = gimple_build_assign (lhs, code1, arg);
1812 gsi_replace (gsi, g, false);
1813 return;
1815 /* Can't use get_compute_type here, as supportable_convert_operation
1816 doesn't necessarily use an optab and needs two arguments. */
1817 tree vec_compute_type
1818 = type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
1819 if (vec_compute_type
1820 && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
1821 && subparts_gt (arg_type, vec_compute_type))
1823 unsigned HOST_WIDE_INT nelts
1824 = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1825 while (nelts > 1)
1827 tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1828 tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1829 if (supportable_convert_operation (code, ret1_type, arg1_type,
1830 &code1))
1832 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1833 ret_type, arg1_type, arg,
1834 NULL_TREE, code1);
1835 g = gimple_build_assign (lhs, new_rhs);
1836 gsi_replace (gsi, g, false);
1837 return;
1839 nelts = nelts / 2;
1843 else if (modifier == NARROW)
1845 switch (code)
1847 CASE_CONVERT:
1848 code1 = VEC_PACK_TRUNC_EXPR;
1849 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1850 break;
1851 case FIX_TRUNC_EXPR:
1852 code1 = VEC_PACK_FIX_TRUNC_EXPR;
1853 /* The signedness is determined from output operand. */
1854 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1855 break;
1856 case FLOAT_EXPR:
1857 code1 = VEC_PACK_FLOAT_EXPR;
1858 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1859 break;
1860 default:
1861 gcc_unreachable ();
1864 if (optab1)
1865 compute_type = get_compute_type (code1, optab1, arg_type);
1866 enum insn_code icode1;
1867 if (VECTOR_TYPE_P (compute_type)
1868 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1869 != CODE_FOR_nothing)
1870 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1872 tree cretd_type
1873 = build_vector_type (TREE_TYPE (ret_type),
1874 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1875 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1877 if (compute_type == arg_type)
1879 new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1880 arg, build_zero_cst (arg_type));
1881 new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1882 TYPE_SIZE (ret_type),
1883 bitsize_int (0));
1884 g = gimple_build_assign (lhs, new_rhs);
1885 gsi_replace (gsi, g, false);
1886 return;
1888 tree dcompute_type
1889 = build_vector_type (TREE_TYPE (compute_type),
1890 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1891 if (TYPE_MAIN_VARIANT (dcompute_type)
1892 == TYPE_MAIN_VARIANT (arg_type))
1893 new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1894 NULL_TREE, bitsize_int (0),
1895 NULL_TREE, code1,
1896 ret_type);
1897 else
1898 new_rhs = expand_vector_piecewise (gsi,
1899 do_vec_narrow_conversion,
1900 arg_type, dcompute_type,
1901 arg, NULL_TREE, code1,
1902 ret_type);
1903 g = gimple_build_assign (lhs, new_rhs);
1904 gsi_replace (gsi, g, false);
1905 return;
1909 else if (modifier == WIDEN)
1911 enum tree_code code2 = ERROR_MARK;
1912 optab optab2 = unknown_optab;
1913 switch (code)
1915 CASE_CONVERT:
1916 code1 = VEC_UNPACK_LO_EXPR;
1917 code2 = VEC_UNPACK_HI_EXPR;
1918 break;
1919 case FIX_TRUNC_EXPR:
1920 code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
1921 code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
1922 break;
1923 case FLOAT_EXPR:
1924 code1 = VEC_UNPACK_FLOAT_LO_EXPR;
1925 code2 = VEC_UNPACK_FLOAT_HI_EXPR;
1926 break;
1927 default:
1928 gcc_unreachable ();
1930 if (BYTES_BIG_ENDIAN)
1931 std::swap (code1, code2);
1933 if (code == FIX_TRUNC_EXPR)
1935 /* The signedness is determined from output operand. */
1936 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1937 optab2 = optab_for_tree_code (code2, ret_type, optab_default);
1939 else
1941 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1942 optab2 = optab_for_tree_code (code2, arg_type, optab_default);
1945 if (optab1 && optab2)
1946 compute_type = get_compute_type (code1, optab1, arg_type);
1948 enum insn_code icode1, icode2;
1949 if (VECTOR_TYPE_P (compute_type)
1950 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1951 != CODE_FOR_nothing)
1952 && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
1953 != CODE_FOR_nothing)
1954 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
1955 && (insn_data[icode1].operand[0].mode
1956 == insn_data[icode2].operand[0].mode))
1958 poly_uint64 nunits
1959 = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
1960 tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
1961 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1963 vec<constructor_elt, va_gc> *v;
1964 tree part_width = TYPE_SIZE (compute_type);
1965 tree index = bitsize_int (0);
1966 int nunits = nunits_for_known_piecewise_op (arg_type);
1967 int delta = tree_to_uhwi (part_width) / arg_elt_bits;
1968 int i;
1969 location_t loc = gimple_location (gsi_stmt (*gsi));
1971 if (compute_type != arg_type)
1972 warning_at (loc, OPT_Wvector_operation_performance,
1973 "vector operation will be expanded piecewise");
1974 else
1976 nunits = 1;
1977 delta = 1;
1980 vec_alloc (v, (nunits + delta - 1) / delta * 2);
1981 for (i = 0; i < nunits;
1982 i += delta, index = int_const_binop (PLUS_EXPR, index,
1983 part_width))
1985 tree a = arg;
1986 if (compute_type != arg_type)
1987 a = tree_vec_extract (gsi, compute_type, a, part_width,
1988 index);
1989 tree result = gimplify_build1 (gsi, code1, cretd_type, a);
1990 constructor_elt ce = { NULL_TREE, result };
1991 v->quick_push (ce);
1992 ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
1993 v->quick_push (ce);
1996 new_rhs = build_constructor (ret_type, v);
1997 g = gimple_build_assign (lhs, new_rhs);
1998 gsi_replace (gsi, g, false);
1999 return;
2004 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
2005 TREE_TYPE (arg_type), arg,
2006 NULL_TREE, code, ret_type);
2007 g = gimple_build_assign (lhs, new_rhs);
2008 gsi_replace (gsi, g, false);
2011 /* Process one statement. If we identify a vector operation, expand it. */
2013 static void
2014 expand_vector_operations_1 (gimple_stmt_iterator *gsi,
2015 bitmap dce_ssa_names)
2017 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
2018 enum tree_code code;
2019 optab op = unknown_optab;
2020 enum gimple_rhs_class rhs_class;
2021 tree new_rhs;
2023 /* Only consider code == GIMPLE_ASSIGN. */
2024 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
2025 if (!stmt)
2027 if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
2028 expand_vector_conversion (gsi);
2029 return;
2032 code = gimple_assign_rhs_code (stmt);
2033 rhs_class = get_gimple_rhs_class (code);
2034 lhs = gimple_assign_lhs (stmt);
2036 if (code == VEC_PERM_EXPR)
2038 lower_vec_perm (gsi);
2039 return;
2042 if (code == VEC_COND_EXPR)
2044 expand_vector_condition (gsi, dce_ssa_names);
2045 return;
2048 if (code == COND_EXPR
2049 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
2050 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
2052 expand_vector_scalar_condition (gsi);
2053 return;
2056 if (code == CONSTRUCTOR
2057 && TREE_CODE (lhs) == SSA_NAME
2058 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2059 && !gimple_clobber_p (stmt)
2060 && optimize)
2062 optimize_vector_constructor (gsi);
2063 return;
2066 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2067 return;
2069 rhs1 = gimple_assign_rhs1 (stmt);
2070 type = gimple_expr_type (stmt);
2071 if (rhs_class == GIMPLE_BINARY_RHS)
2072 rhs2 = gimple_assign_rhs2 (stmt);
2074 if (!VECTOR_TYPE_P (type)
2075 || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2076 return;
2078 /* A scalar operation pretending to be a vector one. */
2079 if (VECTOR_BOOLEAN_TYPE_P (type)
2080 && !VECTOR_MODE_P (TYPE_MODE (type))
2081 && TYPE_MODE (type) != BLKmode
2082 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2083 || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2084 && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2085 && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2086 return;
2088 /* If the vector operation is operating on all same vector elements
2089 implement it with a scalar operation and a splat if the target
2090 supports the scalar operation. */
2091 tree srhs1, srhs2 = NULL_TREE;
2092 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2093 && (rhs2 == NULL_TREE
2094 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2095 && (srhs2 = rhs2))
2096 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2097 /* As we query direct optabs restrict to non-convert operations. */
2098 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2100 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2101 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2102 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2104 tree slhs = make_ssa_name (TREE_TYPE (srhs1));
2105 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
2106 gsi_insert_before (gsi, repl, GSI_SAME_STMT);
2107 gimple_assign_set_rhs_from_tree (gsi,
2108 build_vector_from_val (type, slhs));
2109 update_stmt (stmt);
2110 return;
2114 if (CONVERT_EXPR_CODE_P (code)
2115 || code == FLOAT_EXPR
2116 || code == FIX_TRUNC_EXPR
2117 || code == VIEW_CONVERT_EXPR)
2118 return;
2120 /* The signedness is determined from input argument. */
2121 if (code == VEC_UNPACK_FLOAT_HI_EXPR
2122 || code == VEC_UNPACK_FLOAT_LO_EXPR
2123 || code == VEC_PACK_FLOAT_EXPR)
2125 /* We do not know how to scalarize those. */
2126 return;
2129 /* For widening/narrowing vector operations, the relevant type is of the
2130 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
2131 calculated in the same way above. */
2132 if (code == WIDEN_SUM_EXPR
2133 || code == VEC_WIDEN_MULT_HI_EXPR
2134 || code == VEC_WIDEN_MULT_LO_EXPR
2135 || code == VEC_WIDEN_MULT_EVEN_EXPR
2136 || code == VEC_WIDEN_MULT_ODD_EXPR
2137 || code == VEC_UNPACK_HI_EXPR
2138 || code == VEC_UNPACK_LO_EXPR
2139 || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2140 || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2141 || code == VEC_PACK_TRUNC_EXPR
2142 || code == VEC_PACK_SAT_EXPR
2143 || code == VEC_PACK_FIX_TRUNC_EXPR
2144 || code == VEC_WIDEN_LSHIFT_HI_EXPR
2145 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2147 /* We do not know how to scalarize those. */
2148 return;
2151 /* Choose between vector shift/rotate by vector and vector shift/rotate by
2152 scalar */
2153 if (code == LSHIFT_EXPR
2154 || code == RSHIFT_EXPR
2155 || code == LROTATE_EXPR
2156 || code == RROTATE_EXPR)
2158 optab opv;
2160 /* Check whether we have vector <op> {x,x,x,x} where x
2161 could be a scalar variable or a constant. Transform
2162 vector <op> {x,x,x,x} ==> vector <op> scalar. */
2163 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2165 tree first;
2167 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2169 gimple_assign_set_rhs2 (stmt, first);
2170 update_stmt (stmt);
2171 rhs2 = first;
2175 opv = optab_for_tree_code (code, type, optab_vector);
2176 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2177 op = opv;
2178 else
2180 op = optab_for_tree_code (code, type, optab_scalar);
2182 compute_type = get_compute_type (code, op, type);
2183 if (compute_type == type)
2184 return;
2185 /* The rtl expander will expand vector/scalar as vector/vector
2186 if necessary. Pick one with wider vector type. */
2187 tree compute_vtype = get_compute_type (code, opv, type);
2188 if (subparts_gt (compute_vtype, compute_type))
2190 compute_type = compute_vtype;
2191 op = opv;
2195 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2197 if (compute_type == NULL_TREE)
2198 compute_type = get_compute_type (code, op, type);
2199 if (compute_type == type)
2200 return;
2201 /* Before splitting vector rotates into scalar rotates,
2202 see if we can't use vector shifts and BIT_IOR_EXPR
2203 instead. For vector by vector rotates we'd also
2204 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2205 for now, fold doesn't seem to create such rotates anyway. */
2206 if (compute_type == TREE_TYPE (type)
2207 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2209 optab oplv = vashl_optab, opl = ashl_optab;
2210 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2211 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2212 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2213 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2214 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2215 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2216 /* The rtl expander will expand vector/scalar as vector/vector
2217 if necessary. Pick one with wider vector type. */
2218 if (subparts_gt (compute_lvtype, compute_ltype))
2220 compute_ltype = compute_lvtype;
2221 opl = oplv;
2223 if (subparts_gt (compute_rvtype, compute_rtype))
2225 compute_rtype = compute_rvtype;
2226 opr = oprv;
2228 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2229 BIT_IOR_EXPR. */
2230 compute_type = compute_ltype;
2231 if (subparts_gt (compute_type, compute_rtype))
2232 compute_type = compute_rtype;
2233 if (subparts_gt (compute_type, compute_otype))
2234 compute_type = compute_otype;
2235 /* Verify all 3 operations can be performed in that type. */
2236 if (compute_type != TREE_TYPE (type))
2238 if (optab_handler (opl, TYPE_MODE (compute_type))
2239 == CODE_FOR_nothing
2240 || optab_handler (opr, TYPE_MODE (compute_type))
2241 == CODE_FOR_nothing
2242 || optab_handler (opo, TYPE_MODE (compute_type))
2243 == CODE_FOR_nothing)
2244 compute_type = TREE_TYPE (type);
2249 else
2250 op = optab_for_tree_code (code, type, optab_default);
2252 /* Optabs will try converting a negation into a subtraction, so
2253 look for it as well. TODO: negation of floating-point vectors
2254 might be turned into an exclusive OR toggling the sign bit. */
2255 if (op == unknown_optab
2256 && code == NEGATE_EXPR
2257 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2258 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2260 if (compute_type == NULL_TREE)
2261 compute_type = get_compute_type (code, op, type);
2262 if (compute_type == type)
2263 return;
2265 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code,
2266 dce_ssa_names);
2268 /* Leave expression untouched for later expansion. */
2269 if (new_rhs == NULL_TREE)
2270 return;
2272 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2273 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2274 new_rhs);
2276 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
2277 way to do it is change expand_vector_operation and its callees to
2278 return a tree_code, RHS1 and RHS2 instead of a tree. */
2279 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2280 update_stmt (gsi_stmt (*gsi));
2283 /* Use this to lower vector operations introduced by the vectorizer,
2284 if it may need the bit-twiddling tricks implemented in this file. */
2286 static unsigned int
2287 expand_vector_operations (void)
2289 gimple_stmt_iterator gsi;
2290 basic_block bb;
2291 bool cfg_changed = false;
2293 auto_bitmap dce_ssa_names;
2295 FOR_EACH_BB_FN (bb, cfun)
2297 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2299 expand_vector_operations_1 (&gsi, dce_ssa_names);
2300 /* ??? If we do not cleanup EH then we will ICE in
2301 verification. But in reality we have created wrong-code
2302 as we did not properly transition EH info and edges to
2303 the piecewise computations. */
2304 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2305 && gimple_purge_dead_eh_edges (bb))
2306 cfg_changed = true;
2310 simple_dce_from_worklist (dce_ssa_names);
2312 return cfg_changed ? TODO_cleanup_cfg : 0;
2315 namespace {
2317 const pass_data pass_data_lower_vector =
2319 GIMPLE_PASS, /* type */
2320 "veclower", /* name */
2321 OPTGROUP_VEC, /* optinfo_flags */
2322 TV_NONE, /* tv_id */
2323 PROP_cfg, /* properties_required */
2324 PROP_gimple_lvec, /* properties_provided */
2325 0, /* properties_destroyed */
2326 0, /* todo_flags_start */
2327 TODO_update_ssa, /* todo_flags_finish */
2330 class pass_lower_vector : public gimple_opt_pass
2332 public:
2333 pass_lower_vector (gcc::context *ctxt)
2334 : gimple_opt_pass (pass_data_lower_vector, ctxt)
2337 /* opt_pass methods: */
2338 virtual bool gate (function *fun)
2340 return !(fun->curr_properties & PROP_gimple_lvec);
2343 virtual unsigned int execute (function *)
2345 return expand_vector_operations ();
2348 }; // class pass_lower_vector
2350 } // anon namespace
2352 gimple_opt_pass *
2353 make_pass_lower_vector (gcc::context *ctxt)
2355 return new pass_lower_vector (ctxt);
2358 namespace {
2360 const pass_data pass_data_lower_vector_ssa =
2362 GIMPLE_PASS, /* type */
2363 "veclower2", /* name */
2364 OPTGROUP_VEC, /* optinfo_flags */
2365 TV_NONE, /* tv_id */
2366 PROP_cfg, /* properties_required */
2367 PROP_gimple_lvec, /* properties_provided */
2368 0, /* properties_destroyed */
2369 0, /* todo_flags_start */
2370 ( TODO_update_ssa
2371 | TODO_cleanup_cfg ), /* todo_flags_finish */
2374 class pass_lower_vector_ssa : public gimple_opt_pass
2376 public:
2377 pass_lower_vector_ssa (gcc::context *ctxt)
2378 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2381 /* opt_pass methods: */
2382 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
2383 virtual unsigned int execute (function *)
2385 return expand_vector_operations ();
2388 }; // class pass_lower_vector_ssa
2390 } // anon namespace
2392 gimple_opt_pass *
2393 make_pass_lower_vector_ssa (gcc::context *ctxt)
2395 return new pass_lower_vector_ssa (ctxt);
2398 #include "gt-tree-vect-generic.h"