testsuite: Update scanning symbol sections to support AIX.
[official-gcc.git] / gcc / tree-vect-generic.c
blobd7bafa77134079faf743c1b482251311abb681c5
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "tree-ssa-dce.h"
44 #include "recog.h" /* FIXME: for insn_data */
47 static void expand_vector_operations_1 (gimple_stmt_iterator *, bitmap);
49 /* Return the number of elements in a vector type TYPE that we have
50 already decided needs to be expanded piecewise. We don't support
51 this kind of expansion for variable-length vectors, since we should
52 always check for target support before introducing uses of those. */
53 static unsigned int
54 nunits_for_known_piecewise_op (const_tree type)
56 return TYPE_VECTOR_SUBPARTS (type).to_constant ();
59 /* Return true if TYPE1 has more elements than TYPE2, where either
60 type may be a vector or a scalar. */
62 static inline bool
63 subparts_gt (tree type1, tree type2)
65 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
66 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
67 return known_gt (n1, n2);
70 /* Build a constant of type TYPE, made of VALUE's bits replicated
71 every WIDTH bits to fit TYPE's precision. */
72 static tree
73 build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
75 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
76 / HOST_BITS_PER_WIDE_INT;
77 unsigned HOST_WIDE_INT low, mask;
78 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
79 int i;
81 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
83 if (width == HOST_BITS_PER_WIDE_INT)
84 low = value;
85 else
87 mask = ((HOST_WIDE_INT)1 << width) - 1;
88 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
91 for (i = 0; i < n; i++)
92 a[i] = low;
94 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
95 return wide_int_to_tree
96 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
99 static GTY(()) tree vector_inner_type;
100 static GTY(()) tree vector_last_type;
101 static GTY(()) int vector_last_nunits;
103 /* Return a suitable vector types made of SUBPARTS units each of mode
104 "word_mode" (the global variable). */
105 static tree
106 build_word_mode_vector_type (int nunits)
108 if (!vector_inner_type)
109 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
110 else if (vector_last_nunits == nunits)
112 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
113 return vector_last_type;
116 vector_last_nunits = nunits;
117 vector_last_type = build_vector_type (vector_inner_type, nunits);
118 return vector_last_type;
121 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
122 tree, tree, tree, tree, tree, enum tree_code,
123 tree);
125 tree
126 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
127 tree t, tree bitsize, tree bitpos)
129 if (TREE_CODE (t) == SSA_NAME)
131 gimple *def_stmt = SSA_NAME_DEF_STMT (t);
132 if (is_gimple_assign (def_stmt)
133 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
134 || (bitpos
135 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
136 t = gimple_assign_rhs1 (def_stmt);
138 if (bitpos)
139 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
140 else
141 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
144 static tree
145 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
146 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
147 enum tree_code code, tree type ATTRIBUTE_UNUSED)
149 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
150 return gimplify_build1 (gsi, code, inner_type, a);
153 static tree
154 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
155 tree bitpos, tree bitsize, enum tree_code code,
156 tree type ATTRIBUTE_UNUSED)
158 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
159 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
160 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
161 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
162 return gimplify_build2 (gsi, code, inner_type, a, b);
165 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
167 INNER_TYPE is the type of A and B elements
169 returned expression is of signed integer type with the
170 size equal to the size of INNER_TYPE. */
171 static tree
172 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
173 tree bitpos, tree bitsize, enum tree_code code, tree type)
175 tree stype = TREE_TYPE (type);
176 tree cst_false = build_zero_cst (stype);
177 tree cst_true = build_all_ones_cst (stype);
178 tree cmp;
180 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
181 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
183 cmp = build2 (code, boolean_type_node, a, b);
184 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
187 /* Expand vector addition to scalars. This does bit twiddling
188 in order to increase parallelism:
190 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
191 (a ^ b) & 0x80808080
193 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
194 (a ^ ~b) & 0x80808080
196 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
198 This optimization should be done only if 4 vector items or more
199 fit into a word. */
200 static tree
201 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
202 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
203 enum tree_code code, tree type ATTRIBUTE_UNUSED)
205 unsigned int width = vector_element_bits (TREE_TYPE (a));
206 tree inner_type = TREE_TYPE (TREE_TYPE (a));
207 unsigned HOST_WIDE_INT max;
208 tree low_bits, high_bits, a_low, b_low, result_low, signs;
210 max = GET_MODE_MASK (TYPE_MODE (inner_type));
211 low_bits = build_replicated_const (word_type, width, max >> 1);
212 high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
214 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
215 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
217 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
218 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
219 if (code == PLUS_EXPR)
220 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
221 else
223 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
224 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
227 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
228 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
229 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
232 static tree
233 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
234 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
235 tree bitsize ATTRIBUTE_UNUSED,
236 enum tree_code code ATTRIBUTE_UNUSED,
237 tree type ATTRIBUTE_UNUSED)
239 unsigned int width = vector_element_bits (TREE_TYPE (b));
240 tree inner_type = TREE_TYPE (TREE_TYPE (b));
241 HOST_WIDE_INT max;
242 tree low_bits, high_bits, b_low, result_low, signs;
244 max = GET_MODE_MASK (TYPE_MODE (inner_type));
245 low_bits = build_replicated_const (word_type, width, max >> 1);
246 high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
248 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
250 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
251 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
252 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
253 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
254 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
257 /* Expand a vector operation to scalars, by using many operations
258 whose type is the vector type's inner type. */
259 static tree
260 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
261 tree type, tree inner_type,
262 tree a, tree b, enum tree_code code,
263 tree ret_type = NULL_TREE)
265 vec<constructor_elt, va_gc> *v;
266 tree part_width = TYPE_SIZE (inner_type);
267 tree index = bitsize_int (0);
268 int nunits = nunits_for_known_piecewise_op (type);
269 int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
270 int i;
271 location_t loc = gimple_location (gsi_stmt (*gsi));
273 if (ret_type
274 || types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
275 warning_at (loc, OPT_Wvector_operation_performance,
276 "vector operation will be expanded piecewise");
277 else
278 warning_at (loc, OPT_Wvector_operation_performance,
279 "vector operation will be expanded in parallel");
281 if (!ret_type)
282 ret_type = type;
283 vec_alloc (v, (nunits + delta - 1) / delta);
284 for (i = 0; i < nunits;
285 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
287 tree result = f (gsi, inner_type, a, b, index, part_width, code,
288 ret_type);
289 constructor_elt ce = {NULL_TREE, result};
290 v->quick_push (ce);
293 return build_constructor (ret_type, v);
296 /* Expand a vector operation to scalars with the freedom to use
297 a scalar integer type, or to use a different size for the items
298 in the vector type. */
299 static tree
300 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
301 tree a, tree b, enum tree_code code)
303 tree result, compute_type;
304 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
305 location_t loc = gimple_location (gsi_stmt (*gsi));
307 /* We have three strategies. If the type is already correct, just do
308 the operation an element at a time. Else, if the vector is wider than
309 one word, do it a word at a time; finally, if the vector is smaller
310 than one word, do it as a scalar. */
311 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
312 return expand_vector_piecewise (gsi, f,
313 type, TREE_TYPE (type),
314 a, b, code);
315 else if (n_words > 1)
317 tree word_type = build_word_mode_vector_type (n_words);
318 result = expand_vector_piecewise (gsi, f,
319 word_type, TREE_TYPE (word_type),
320 a, b, code);
321 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
322 GSI_SAME_STMT);
324 else
326 /* Use a single scalar operation with a mode no wider than word_mode. */
327 scalar_int_mode mode
328 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
329 compute_type = lang_hooks.types.type_for_mode (mode, 1);
330 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
331 warning_at (loc, OPT_Wvector_operation_performance,
332 "vector operation will be expanded with a "
333 "single scalar operation");
336 return result;
339 /* Expand a vector operation to scalars; for integer types we can use
340 special bit twiddling tricks to do the sums a word at a time, using
341 function F_PARALLEL instead of F. These tricks are done only if
342 they can process at least four items, that is, only if the vector
343 holds at least four items and if a word can hold four items. */
344 static tree
345 expand_vector_addition (gimple_stmt_iterator *gsi,
346 elem_op_func f, elem_op_func f_parallel,
347 tree type, tree a, tree b, enum tree_code code)
349 int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
351 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
352 && parts_per_word >= 4
353 && nunits_for_known_piecewise_op (type) >= 4)
354 return expand_vector_parallel (gsi, f_parallel,
355 type, a, b, code);
356 else
357 return expand_vector_piecewise (gsi, f,
358 type, TREE_TYPE (type),
359 a, b, code);
362 static bool
363 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names);
365 /* Try to expand vector comparison expression OP0 CODE OP1 by
366 querying optab if the following expression:
367 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
368 can be expanded. */
369 static tree
370 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
371 tree op1, enum tree_code code,
372 bitmap dce_ssa_names)
374 tree lhs = gimple_assign_lhs (gsi_stmt (*gsi));
375 use_operand_p use_p;
376 imm_use_iterator iterator;
377 bool vec_cond_expr_only = true;
379 /* As seen in PR95830, we should not expand comparisons that are only
380 feeding a VEC_COND_EXPR statement. */
381 auto_vec<gimple *> uses;
382 FOR_EACH_IMM_USE_FAST (use_p, iterator, lhs)
383 uses.safe_push (USE_STMT (use_p));
385 for (unsigned i = 0; i < uses.length (); i ++)
387 gassign *use = dyn_cast<gassign *> (uses[i]);
388 if (use != NULL
389 && gimple_assign_rhs_code (use) == VEC_COND_EXPR
390 && gimple_assign_rhs1 (use) == lhs)
392 gimple_stmt_iterator it = gsi_for_stmt (use);
393 if (!expand_vector_condition (&it, dce_ssa_names))
395 vec_cond_expr_only = false;
396 break;
399 else
401 vec_cond_expr_only = false;
402 break;
406 if (!uses.is_empty () && vec_cond_expr_only)
407 return NULL_TREE;
409 tree t;
410 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code))
412 if (VECTOR_BOOLEAN_TYPE_P (type)
413 && SCALAR_INT_MODE_P (TYPE_MODE (type))
414 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
415 TYPE_VECTOR_SUBPARTS (type)
416 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
417 (TREE_TYPE (type)))))
419 tree inner_type = TREE_TYPE (TREE_TYPE (op0));
420 tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
421 tree index = bitsize_int (0);
422 int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
423 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
424 tree ret_type = build_nonstandard_integer_type (prec, 1);
425 tree ret_inner_type = boolean_type_node;
426 int i;
427 location_t loc = gimple_location (gsi_stmt (*gsi));
428 t = build_zero_cst (ret_type);
430 if (TYPE_PRECISION (ret_inner_type) != 1)
431 ret_inner_type = build_nonstandard_integer_type (1, 1);
432 warning_at (loc, OPT_Wvector_operation_performance,
433 "vector operation will be expanded piecewise");
434 for (i = 0; i < nunits;
435 i++, index = int_const_binop (PLUS_EXPR, index, part_width))
437 tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
438 index);
439 tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
440 index);
441 tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
442 t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
443 bitsize_int (i));
445 t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
447 else
448 t = expand_vector_piecewise (gsi, do_compare, type,
449 TREE_TYPE (TREE_TYPE (op0)), op0, op1,
450 code);
452 else
453 t = NULL_TREE;
455 return t;
458 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
459 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
460 the result if successful, otherwise return NULL_TREE. */
461 static tree
462 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
464 optab op;
465 unsigned int i, nunits = nunits_for_known_piecewise_op (type);
466 bool scalar_shift = true;
468 for (i = 1; i < nunits; i++)
470 if (shiftcnts[i] != shiftcnts[0])
471 scalar_shift = false;
474 if (scalar_shift && shiftcnts[0] == 0)
475 return op0;
477 if (scalar_shift)
479 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
480 if (op != unknown_optab
481 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
482 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
483 build_int_cst (NULL_TREE, shiftcnts[0]));
486 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
487 if (op != unknown_optab
488 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
490 tree_vector_builder vec (type, nunits, 1);
491 for (i = 0; i < nunits; i++)
492 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
493 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
496 return NULL_TREE;
499 /* Try to expand integer vector division by constant using
500 widening multiply, shifts and additions. */
501 static tree
502 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
503 tree op1, enum tree_code code)
505 bool use_pow2 = true;
506 bool has_vector_shift = true;
507 bool use_abs_op1 = false;
508 int mode = -1, this_mode;
509 int pre_shift = -1, post_shift;
510 unsigned int nunits = nunits_for_known_piecewise_op (type);
511 int *shifts = XALLOCAVEC (int, nunits * 4);
512 int *pre_shifts = shifts + nunits;
513 int *post_shifts = pre_shifts + nunits;
514 int *shift_temps = post_shifts + nunits;
515 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
516 int prec = TYPE_PRECISION (TREE_TYPE (type));
517 int dummy_int;
518 unsigned int i;
519 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
520 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
521 tree cur_op, mulcst, tem;
522 optab op;
524 if (prec > HOST_BITS_PER_WIDE_INT)
525 return NULL_TREE;
527 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
528 if (op == unknown_optab
529 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
530 has_vector_shift = false;
532 /* Analysis phase. Determine if all op1 elements are either power
533 of two and it is possible to expand it using shifts (or for remainder
534 using masking). Additionally compute the multiplicative constants
535 and pre and post shifts if the division is to be expanded using
536 widening or high part multiplication plus shifts. */
537 for (i = 0; i < nunits; i++)
539 tree cst = VECTOR_CST_ELT (op1, i);
540 unsigned HOST_WIDE_INT ml;
542 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
543 return NULL_TREE;
544 pre_shifts[i] = 0;
545 post_shifts[i] = 0;
546 mulc[i] = 0;
547 if (use_pow2
548 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
549 use_pow2 = false;
550 if (use_pow2)
552 shifts[i] = tree_log2 (cst);
553 if (shifts[i] != shifts[0]
554 && code == TRUNC_DIV_EXPR
555 && !has_vector_shift)
556 use_pow2 = false;
558 if (mode == -2)
559 continue;
560 if (sign_p == UNSIGNED)
562 unsigned HOST_WIDE_INT mh;
563 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
565 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
566 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
567 return NULL_TREE;
569 if (d <= 1)
571 mode = -2;
572 continue;
575 /* Find a suitable multiplier and right shift count
576 instead of multiplying with D. */
577 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
579 /* If the suggested multiplier is more than SIZE bits, we can
580 do better for even divisors, using an initial right shift. */
581 if ((mh != 0 && (d & 1) == 0)
582 || (!has_vector_shift && pre_shift != -1))
584 if (has_vector_shift)
585 pre_shift = ctz_or_zero (d);
586 else if (pre_shift == -1)
588 unsigned int j;
589 for (j = 0; j < nunits; j++)
591 tree cst2 = VECTOR_CST_ELT (op1, j);
592 unsigned HOST_WIDE_INT d2;
593 int this_pre_shift;
595 if (!tree_fits_uhwi_p (cst2))
596 return NULL_TREE;
597 d2 = tree_to_uhwi (cst2) & mask;
598 if (d2 == 0)
599 return NULL_TREE;
600 this_pre_shift = floor_log2 (d2 & -d2);
601 if (pre_shift == -1 || this_pre_shift < pre_shift)
602 pre_shift = this_pre_shift;
604 if (i != 0 && pre_shift != 0)
606 /* Restart. */
607 i = -1U;
608 mode = -1;
609 continue;
612 if (pre_shift != 0)
614 if ((d >> pre_shift) <= 1)
616 mode = -2;
617 continue;
619 mh = choose_multiplier (d >> pre_shift, prec,
620 prec - pre_shift,
621 &ml, &post_shift, &dummy_int);
622 gcc_assert (!mh);
623 pre_shifts[i] = pre_shift;
626 if (!mh)
627 this_mode = 0;
628 else
629 this_mode = 1;
631 else
633 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
634 unsigned HOST_WIDE_INT abs_d;
636 if (d == -1)
637 return NULL_TREE;
639 /* Since d might be INT_MIN, we have to cast to
640 unsigned HOST_WIDE_INT before negating to avoid
641 undefined signed overflow. */
642 abs_d = (d >= 0
643 ? (unsigned HOST_WIDE_INT) d
644 : - (unsigned HOST_WIDE_INT) d);
646 /* n rem d = n rem -d */
647 if (code == TRUNC_MOD_EXPR && d < 0)
649 d = abs_d;
650 use_abs_op1 = true;
652 if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
654 /* This case is not handled correctly below. */
655 mode = -2;
656 continue;
658 if (abs_d <= 1)
660 mode = -2;
661 continue;
664 choose_multiplier (abs_d, prec, prec - 1, &ml,
665 &post_shift, &dummy_int);
666 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
668 this_mode = 4 + (d < 0);
669 ml |= HOST_WIDE_INT_M1U << (prec - 1);
671 else
672 this_mode = 2 + (d < 0);
674 mulc[i] = ml;
675 post_shifts[i] = post_shift;
676 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
677 || post_shift >= prec
678 || pre_shifts[i] >= prec)
679 this_mode = -2;
681 if (i == 0)
682 mode = this_mode;
683 else if (mode != this_mode)
684 mode = -2;
687 if (use_pow2)
689 tree addend = NULL_TREE;
690 if (sign_p == SIGNED)
692 tree uns_type;
694 /* Both division and remainder sequences need
695 op0 < 0 ? mask : 0 computed. It can be either computed as
696 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
697 if none of the shifts is 0, or as the conditional. */
698 for (i = 0; i < nunits; i++)
699 if (shifts[i] == 0)
700 break;
701 uns_type
702 = build_vector_type (build_nonstandard_integer_type (prec, 1),
703 nunits);
704 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
706 for (i = 0; i < nunits; i++)
707 shift_temps[i] = prec - 1;
708 cur_op = add_rshift (gsi, type, op0, shift_temps);
709 if (cur_op != NULL_TREE)
711 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
712 uns_type, cur_op);
713 for (i = 0; i < nunits; i++)
714 shift_temps[i] = prec - shifts[i];
715 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
716 if (cur_op != NULL_TREE)
717 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
718 type, cur_op);
721 if (addend == NULL_TREE
722 && expand_vec_cond_expr_p (type, type, LT_EXPR))
724 tree zero, cst, mask_type, mask;
725 gimple *stmt, *cond;
727 mask_type = truth_type_for (type);
728 zero = build_zero_cst (type);
729 mask = make_ssa_name (mask_type);
730 cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
731 gsi_insert_before (gsi, cond, GSI_SAME_STMT);
732 tree_vector_builder vec (type, nunits, 1);
733 for (i = 0; i < nunits; i++)
734 vec.quick_push (build_int_cst (TREE_TYPE (type),
735 (HOST_WIDE_INT_1U
736 << shifts[i]) - 1));
737 cst = vec.build ();
738 addend = make_ssa_name (type);
739 stmt
740 = gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
741 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
744 if (code == TRUNC_DIV_EXPR)
746 if (sign_p == UNSIGNED)
748 /* q = op0 >> shift; */
749 cur_op = add_rshift (gsi, type, op0, shifts);
750 if (cur_op != NULL_TREE)
751 return cur_op;
753 else if (addend != NULL_TREE)
755 /* t1 = op0 + addend;
756 q = t1 >> shift; */
757 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
758 if (op != unknown_optab
759 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
761 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
762 cur_op = add_rshift (gsi, type, cur_op, shifts);
763 if (cur_op != NULL_TREE)
764 return cur_op;
768 else
770 tree mask;
771 tree_vector_builder vec (type, nunits, 1);
772 for (i = 0; i < nunits; i++)
773 vec.quick_push (build_int_cst (TREE_TYPE (type),
774 (HOST_WIDE_INT_1U
775 << shifts[i]) - 1));
776 mask = vec.build ();
777 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
778 if (op != unknown_optab
779 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
781 if (sign_p == UNSIGNED)
782 /* r = op0 & mask; */
783 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
784 else if (addend != NULL_TREE)
786 /* t1 = op0 + addend;
787 t2 = t1 & mask;
788 r = t2 - addend; */
789 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
790 if (op != unknown_optab
791 && optab_handler (op, TYPE_MODE (type))
792 != CODE_FOR_nothing)
794 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
795 addend);
796 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
797 cur_op, mask);
798 op = optab_for_tree_code (MINUS_EXPR, type,
799 optab_default);
800 if (op != unknown_optab
801 && optab_handler (op, TYPE_MODE (type))
802 != CODE_FOR_nothing)
803 return gimplify_build2 (gsi, MINUS_EXPR, type,
804 cur_op, addend);
811 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
812 return NULL_TREE;
814 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
815 return NULL_TREE;
817 cur_op = op0;
819 switch (mode)
821 case 0:
822 gcc_assert (sign_p == UNSIGNED);
823 /* t1 = oprnd0 >> pre_shift;
824 t2 = t1 h* ml;
825 q = t2 >> post_shift; */
826 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
827 if (cur_op == NULL_TREE)
828 return NULL_TREE;
829 break;
830 case 1:
831 gcc_assert (sign_p == UNSIGNED);
832 for (i = 0; i < nunits; i++)
834 shift_temps[i] = 1;
835 post_shifts[i]--;
837 break;
838 case 2:
839 case 3:
840 case 4:
841 case 5:
842 gcc_assert (sign_p == SIGNED);
843 for (i = 0; i < nunits; i++)
844 shift_temps[i] = prec - 1;
845 break;
846 default:
847 return NULL_TREE;
850 tree_vector_builder vec (type, nunits, 1);
851 for (i = 0; i < nunits; i++)
852 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
853 mulcst = vec.build ();
855 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
857 switch (mode)
859 case 0:
860 /* t1 = oprnd0 >> pre_shift;
861 t2 = t1 h* ml;
862 q = t2 >> post_shift; */
863 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
864 break;
865 case 1:
866 /* t1 = oprnd0 h* ml;
867 t2 = oprnd0 - t1;
868 t3 = t2 >> 1;
869 t4 = t1 + t3;
870 q = t4 >> (post_shift - 1); */
871 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
872 if (op == unknown_optab
873 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
874 return NULL_TREE;
875 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
876 tem = add_rshift (gsi, type, tem, shift_temps);
877 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
878 if (op == unknown_optab
879 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
880 return NULL_TREE;
881 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
882 cur_op = add_rshift (gsi, type, tem, post_shifts);
883 if (cur_op == NULL_TREE)
884 return NULL_TREE;
885 break;
886 case 2:
887 case 3:
888 case 4:
889 case 5:
890 /* t1 = oprnd0 h* ml;
891 t2 = t1; [ iff (mode & 2) != 0 ]
892 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
893 t3 = t2 >> post_shift;
894 t4 = oprnd0 >> (prec - 1);
895 q = t3 - t4; [ iff (mode & 1) == 0 ]
896 q = t4 - t3; [ iff (mode & 1) != 0 ] */
897 if ((mode & 2) == 0)
899 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
900 if (op == unknown_optab
901 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
902 return NULL_TREE;
903 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
905 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
906 if (cur_op == NULL_TREE)
907 return NULL_TREE;
908 tem = add_rshift (gsi, type, op0, shift_temps);
909 if (tem == NULL_TREE)
910 return NULL_TREE;
911 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
912 if (op == unknown_optab
913 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
914 return NULL_TREE;
915 if ((mode & 1) == 0)
916 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
917 else
918 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
919 break;
920 default:
921 gcc_unreachable ();
924 if (code == TRUNC_DIV_EXPR)
925 return cur_op;
927 /* We divided. Now finish by:
928 t1 = q * oprnd1;
929 r = oprnd0 - t1; */
930 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
931 if (op == unknown_optab
932 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
933 return NULL_TREE;
934 if (use_abs_op1)
936 tree_vector_builder elts;
937 if (!elts.new_unary_operation (type, op1, false))
938 return NULL_TREE;
939 unsigned int count = elts.encoded_nelts ();
940 for (unsigned int i = 0; i < count; ++i)
942 tree elem1 = VECTOR_CST_ELT (op1, i);
944 tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
945 if (elt == NULL_TREE)
946 return NULL_TREE;
947 elts.quick_push (elt);
949 op1 = elts.build ();
951 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
952 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
953 if (op == unknown_optab
954 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
955 return NULL_TREE;
956 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
959 /* Expand a vector condition to scalars, by using many conditions
960 on the vector's elements. */
962 static bool
963 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names)
965 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
966 tree type = gimple_expr_type (stmt);
967 tree a = gimple_assign_rhs1 (stmt);
968 tree a1 = a;
969 tree a2 = NULL_TREE;
970 bool a_is_comparison = false;
971 bool a_is_scalar_bitmask = false;
972 tree b = gimple_assign_rhs2 (stmt);
973 tree c = gimple_assign_rhs3 (stmt);
974 vec<constructor_elt, va_gc> *v;
975 tree constr;
976 tree inner_type = TREE_TYPE (type);
977 tree width = vector_element_bits_tree (type);
978 tree cond_type = TREE_TYPE (TREE_TYPE (a));
979 tree comp_inner_type = cond_type;
980 tree index = bitsize_int (0);
981 tree comp_width = width;
982 tree comp_index = index;
983 location_t loc = gimple_location (gsi_stmt (*gsi));
984 tree_code code = TREE_CODE (a);
985 gassign *assign = NULL;
987 if (code == SSA_NAME)
989 assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (a));
990 if (assign != NULL
991 && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
993 a_is_comparison = true;
994 a1 = gimple_assign_rhs1 (assign);
995 a2 = gimple_assign_rhs2 (assign);
996 code = gimple_assign_rhs_code (assign);
997 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
998 comp_width = vector_element_bits_tree (TREE_TYPE (a1));
1002 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code))
1004 gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
1005 return true;
1008 /* Handle vector boolean types with bitmasks. If there is a comparison
1009 and we can expand the comparison into the vector boolean bitmask,
1010 or otherwise if it is compatible with type, we can transform
1011 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
1012 into
1013 tmp_6 = x_2 < y_3;
1014 tmp_7 = tmp_6 & vbfld_4;
1015 tmp_8 = ~tmp_6;
1016 tmp_9 = tmp_8 & vbfld_5;
1017 vbfld_1 = tmp_7 | tmp_9;
1018 Similarly for vbfld_10 instead of x_2 < y_3. */
1019 if (VECTOR_BOOLEAN_TYPE_P (type)
1020 && SCALAR_INT_MODE_P (TYPE_MODE (type))
1021 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
1022 TYPE_VECTOR_SUBPARTS (type)
1023 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
1024 && (a_is_comparison
1025 ? useless_type_conversion_p (type, TREE_TYPE (a))
1026 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
1028 if (a_is_comparison)
1029 a = gimplify_build2 (gsi, code, type, a1, a2);
1030 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
1031 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
1032 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
1033 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
1034 gimple_assign_set_rhs_from_tree (gsi, a);
1035 update_stmt (gsi_stmt (*gsi));
1036 return true;
1039 /* TODO: try and find a smaller vector type. */
1041 warning_at (loc, OPT_Wvector_operation_performance,
1042 "vector condition will be expanded piecewise");
1044 if (!a_is_comparison
1045 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1046 && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1047 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1048 TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1049 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1050 (TREE_TYPE (TREE_TYPE (a))))))
1052 a_is_scalar_bitmask = true;
1053 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1054 tree atype = build_nonstandard_integer_type (prec, 1);
1055 a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1058 int nunits = nunits_for_known_piecewise_op (type);
1059 vec_alloc (v, nunits);
1060 for (int i = 0; i < nunits; i++)
1062 tree aa, result;
1063 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1064 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1065 if (a_is_comparison)
1067 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1068 comp_width, comp_index);
1069 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1070 comp_width, comp_index);
1071 aa = fold_build2 (code, cond_type, aa1, aa2);
1073 else if (a_is_scalar_bitmask)
1075 wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1076 result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1077 a, wide_int_to_tree (TREE_TYPE (a), w));
1078 aa = fold_build2 (NE_EXPR, boolean_type_node, result,
1079 build_zero_cst (TREE_TYPE (a)));
1081 else
1082 aa = tree_vec_extract (gsi, cond_type, a, width, index);
1083 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1084 constructor_elt ce = {NULL_TREE, result};
1085 v->quick_push (ce);
1086 index = int_const_binop (PLUS_EXPR, index, width);
1087 if (width == comp_width)
1088 comp_index = index;
1089 else
1090 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1093 constr = build_constructor (type, v);
1094 gimple_assign_set_rhs_from_tree (gsi, constr);
1095 update_stmt (gsi_stmt (*gsi));
1097 if (a_is_comparison)
1098 bitmap_set_bit (dce_ssa_names,
1099 SSA_NAME_VERSION (gimple_assign_lhs (assign)));
1101 return false;
1104 static tree
1105 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1106 gassign *assign, enum tree_code code,
1107 bitmap dce_ssa_names)
1109 machine_mode compute_mode = TYPE_MODE (compute_type);
1111 /* If the compute mode is not a vector mode (hence we are not decomposing
1112 a BLKmode vector to smaller, hardware-supported vectors), we may want
1113 to expand the operations in parallel. */
1114 if (!VECTOR_MODE_P (compute_mode))
1115 switch (code)
1117 case PLUS_EXPR:
1118 case MINUS_EXPR:
1119 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1120 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1121 gimple_assign_rhs1 (assign),
1122 gimple_assign_rhs2 (assign), code);
1123 break;
1125 case NEGATE_EXPR:
1126 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1127 return expand_vector_addition (gsi, do_unop, do_negate, type,
1128 gimple_assign_rhs1 (assign),
1129 NULL_TREE, code);
1130 break;
1132 case BIT_AND_EXPR:
1133 case BIT_IOR_EXPR:
1134 case BIT_XOR_EXPR:
1135 return expand_vector_parallel (gsi, do_binop, type,
1136 gimple_assign_rhs1 (assign),
1137 gimple_assign_rhs2 (assign), code);
1139 case BIT_NOT_EXPR:
1140 return expand_vector_parallel (gsi, do_unop, type,
1141 gimple_assign_rhs1 (assign),
1142 NULL_TREE, code);
1143 case EQ_EXPR:
1144 case NE_EXPR:
1145 case GT_EXPR:
1146 case LT_EXPR:
1147 case GE_EXPR:
1148 case LE_EXPR:
1149 case UNEQ_EXPR:
1150 case UNGT_EXPR:
1151 case UNLT_EXPR:
1152 case UNGE_EXPR:
1153 case UNLE_EXPR:
1154 case LTGT_EXPR:
1155 case ORDERED_EXPR:
1156 case UNORDERED_EXPR:
1158 tree rhs1 = gimple_assign_rhs1 (assign);
1159 tree rhs2 = gimple_assign_rhs2 (assign);
1161 return expand_vector_comparison (gsi, type, rhs1, rhs2, code,
1162 dce_ssa_names);
1165 case TRUNC_DIV_EXPR:
1166 case TRUNC_MOD_EXPR:
1168 tree rhs1 = gimple_assign_rhs1 (assign);
1169 tree rhs2 = gimple_assign_rhs2 (assign);
1170 tree ret;
1172 if (!optimize
1173 || !VECTOR_INTEGER_TYPE_P (type)
1174 || TREE_CODE (rhs2) != VECTOR_CST
1175 || !VECTOR_MODE_P (TYPE_MODE (type)))
1176 break;
1178 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1179 if (ret != NULL_TREE)
1180 return ret;
1181 break;
1184 default:
1185 break;
1188 if (TREE_CODE_CLASS (code) == tcc_unary)
1189 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1190 gimple_assign_rhs1 (assign),
1191 NULL_TREE, code);
1192 else
1193 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1194 gimple_assign_rhs1 (assign),
1195 gimple_assign_rhs2 (assign), code);
1198 /* Try to optimize
1199 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1200 style stmts into:
1201 _9 = { b_7, b_7, b_7, b_7 };
1202 a_5 = _9 + { 0, 3, 6, 9 };
1203 because vector splat operation is usually more efficient
1204 than piecewise initialization of the vector. */
1206 static void
1207 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1209 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1210 tree lhs = gimple_assign_lhs (stmt);
1211 tree rhs = gimple_assign_rhs1 (stmt);
1212 tree type = TREE_TYPE (rhs);
1213 unsigned int i, j;
1214 unsigned HOST_WIDE_INT nelts;
1215 bool all_same = true;
1216 constructor_elt *elt;
1217 gimple *g;
1218 tree base = NULL_TREE;
1219 optab op;
1221 if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1222 || nelts <= 2
1223 || CONSTRUCTOR_NELTS (rhs) != nelts)
1224 return;
1225 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1226 if (op == unknown_optab
1227 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1228 return;
1229 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1230 if (TREE_CODE (elt->value) != SSA_NAME
1231 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1232 return;
1233 else
1235 tree this_base = elt->value;
1236 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1237 all_same = false;
1238 for (j = 0; j < nelts + 1; j++)
1240 g = SSA_NAME_DEF_STMT (this_base);
1241 if (is_gimple_assign (g)
1242 && gimple_assign_rhs_code (g) == PLUS_EXPR
1243 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1244 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1245 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1246 this_base = gimple_assign_rhs1 (g);
1247 else
1248 break;
1250 if (i == 0)
1251 base = this_base;
1252 else if (this_base != base)
1253 return;
1255 if (all_same)
1256 return;
1257 tree_vector_builder cst (type, nelts, 1);
1258 for (i = 0; i < nelts; i++)
1260 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1261 tree elt = build_zero_cst (TREE_TYPE (base));
1262 while (this_base != base)
1264 g = SSA_NAME_DEF_STMT (this_base);
1265 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1266 elt, gimple_assign_rhs2 (g));
1267 if (elt == NULL_TREE
1268 || TREE_CODE (elt) != INTEGER_CST
1269 || TREE_OVERFLOW (elt))
1270 return;
1271 this_base = gimple_assign_rhs1 (g);
1273 cst.quick_push (elt);
1275 for (i = 0; i < nelts; i++)
1276 CONSTRUCTOR_ELT (rhs, i)->value = base;
1277 g = gimple_build_assign (make_ssa_name (type), rhs);
1278 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1279 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1280 cst.build ());
1281 gsi_replace (gsi, g, false);
1284 /* Return a type for the widest vector mode whose components are of type
1285 TYPE, or NULL_TREE if none is found. */
1287 static tree
1288 type_for_widest_vector_mode (tree type, optab op)
1290 machine_mode inner_mode = TYPE_MODE (type);
1291 machine_mode best_mode = VOIDmode, mode;
1292 poly_int64 best_nunits = 0;
1294 if (SCALAR_FLOAT_MODE_P (inner_mode))
1295 mode = MIN_MODE_VECTOR_FLOAT;
1296 else if (SCALAR_FRACT_MODE_P (inner_mode))
1297 mode = MIN_MODE_VECTOR_FRACT;
1298 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1299 mode = MIN_MODE_VECTOR_UFRACT;
1300 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1301 mode = MIN_MODE_VECTOR_ACCUM;
1302 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1303 mode = MIN_MODE_VECTOR_UACCUM;
1304 else if (inner_mode == BImode)
1305 mode = MIN_MODE_VECTOR_BOOL;
1306 else
1307 mode = MIN_MODE_VECTOR_INT;
1309 FOR_EACH_MODE_FROM (mode, mode)
1310 if (GET_MODE_INNER (mode) == inner_mode
1311 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1312 && optab_handler (op, mode) != CODE_FOR_nothing)
1313 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1315 if (best_mode == VOIDmode)
1316 return NULL_TREE;
1317 else
1318 return build_vector_type_for_mode (type, best_mode);
1322 /* Build a reference to the element of the vector VECT. Function
1323 returns either the element itself, either BIT_FIELD_REF, or an
1324 ARRAY_REF expression.
1326 GSI is required to insert temporary variables while building a
1327 refernece to the element of the vector VECT.
1329 PTMPVEC is a pointer to the temporary variable for caching
1330 purposes. In case when PTMPVEC is NULL new temporary variable
1331 will be created. */
1332 static tree
1333 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1335 tree vect_type, vect_elt_type;
1336 gimple *asgn;
1337 tree tmpvec;
1338 tree arraytype;
1339 bool need_asgn = true;
1340 unsigned int elements;
1342 vect_type = TREE_TYPE (vect);
1343 vect_elt_type = TREE_TYPE (vect_type);
1344 elements = nunits_for_known_piecewise_op (vect_type);
1346 if (TREE_CODE (idx) == INTEGER_CST)
1348 unsigned HOST_WIDE_INT index;
1350 /* Given that we're about to compute a binary modulus,
1351 we don't care about the high bits of the value. */
1352 index = TREE_INT_CST_LOW (idx);
1353 if (!tree_fits_uhwi_p (idx) || index >= elements)
1355 index &= elements - 1;
1356 idx = build_int_cst (TREE_TYPE (idx), index);
1359 /* When lowering a vector statement sequence do some easy
1360 simplification by looking through intermediate vector results. */
1361 if (TREE_CODE (vect) == SSA_NAME)
1363 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1364 if (is_gimple_assign (def_stmt)
1365 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1366 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1367 vect = gimple_assign_rhs1 (def_stmt);
1370 if (TREE_CODE (vect) == VECTOR_CST)
1371 return VECTOR_CST_ELT (vect, index);
1372 else if (TREE_CODE (vect) == CONSTRUCTOR
1373 && (CONSTRUCTOR_NELTS (vect) == 0
1374 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1375 != VECTOR_TYPE))
1377 if (index < CONSTRUCTOR_NELTS (vect))
1378 return CONSTRUCTOR_ELT (vect, index)->value;
1379 return build_zero_cst (vect_elt_type);
1381 else
1383 tree size = vector_element_bits_tree (vect_type);
1384 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1385 size);
1386 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1390 if (!ptmpvec)
1391 tmpvec = create_tmp_var (vect_type, "vectmp");
1392 else if (!*ptmpvec)
1393 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1394 else
1396 tmpvec = *ptmpvec;
1397 need_asgn = false;
1400 if (need_asgn)
1402 TREE_ADDRESSABLE (tmpvec) = 1;
1403 asgn = gimple_build_assign (tmpvec, vect);
1404 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1407 arraytype = build_array_type_nelts (vect_elt_type, elements);
1408 return build4 (ARRAY_REF, vect_elt_type,
1409 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1410 idx, NULL_TREE, NULL_TREE);
1413 /* Check if VEC_PERM_EXPR within the given setting is supported
1414 by hardware, or lower it piecewise.
1416 When VEC_PERM_EXPR has the same first and second operands:
1417 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1418 {v0[mask[0]], v0[mask[1]], ...}
1419 MASK and V0 must have the same number of elements.
1421 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1422 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1423 V0 and V1 must have the same type. MASK, V0, V1 must have the
1424 same number of arguments. */
1426 static void
1427 lower_vec_perm (gimple_stmt_iterator *gsi)
1429 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1430 tree mask = gimple_assign_rhs3 (stmt);
1431 tree vec0 = gimple_assign_rhs1 (stmt);
1432 tree vec1 = gimple_assign_rhs2 (stmt);
1433 tree vect_type = TREE_TYPE (vec0);
1434 tree mask_type = TREE_TYPE (mask);
1435 tree vect_elt_type = TREE_TYPE (vect_type);
1436 tree mask_elt_type = TREE_TYPE (mask_type);
1437 unsigned HOST_WIDE_INT elements;
1438 vec<constructor_elt, va_gc> *v;
1439 tree constr, t, si, i_val;
1440 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1441 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1442 location_t loc = gimple_location (gsi_stmt (*gsi));
1443 unsigned i;
1445 if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1446 return;
1448 if (TREE_CODE (mask) == SSA_NAME)
1450 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1451 if (is_gimple_assign (def_stmt)
1452 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1453 mask = gimple_assign_rhs1 (def_stmt);
1456 vec_perm_builder sel_int;
1458 if (TREE_CODE (mask) == VECTOR_CST
1459 && tree_to_vec_perm_builder (&sel_int, mask))
1461 vec_perm_indices indices (sel_int, 2, elements);
1462 if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1464 gimple_assign_set_rhs3 (stmt, mask);
1465 update_stmt (stmt);
1466 return;
1468 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1469 vector as VEC1 and a right element shift MASK. */
1470 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1471 != CODE_FOR_nothing
1472 && TREE_CODE (vec1) == VECTOR_CST
1473 && initializer_zerop (vec1)
1474 && maybe_ne (indices[0], 0)
1475 && known_lt (poly_uint64 (indices[0]), elements))
1477 bool ok_p = indices.series_p (0, 1, indices[0], 1);
1478 if (!ok_p)
1480 for (i = 1; i < elements; ++i)
1482 poly_uint64 actual = indices[i];
1483 poly_uint64 expected = i + indices[0];
1484 /* Indices into the second vector are all equivalent. */
1485 if (maybe_lt (actual, elements)
1486 ? maybe_ne (actual, expected)
1487 : maybe_lt (expected, elements))
1488 break;
1490 ok_p = i == elements;
1492 if (ok_p)
1494 gimple_assign_set_rhs3 (stmt, mask);
1495 update_stmt (stmt);
1496 return;
1499 /* And similarly vec_shl pattern. */
1500 if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1501 != CODE_FOR_nothing
1502 && TREE_CODE (vec0) == VECTOR_CST
1503 && initializer_zerop (vec0))
1505 unsigned int first = 0;
1506 for (i = 0; i < elements; ++i)
1507 if (known_eq (poly_uint64 (indices[i]), elements))
1509 if (i == 0 || first)
1510 break;
1511 first = i;
1513 else if (first
1514 ? maybe_ne (poly_uint64 (indices[i]),
1515 elements + i - first)
1516 : maybe_ge (poly_uint64 (indices[i]), elements))
1517 break;
1518 if (i == elements)
1520 gimple_assign_set_rhs3 (stmt, mask);
1521 update_stmt (stmt);
1522 return;
1526 else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1527 return;
1529 warning_at (loc, OPT_Wvector_operation_performance,
1530 "vector shuffling operation will be expanded piecewise");
1532 vec_alloc (v, elements);
1533 for (i = 0; i < elements; i++)
1535 si = size_int (i);
1536 i_val = vector_element (gsi, mask, si, &masktmp);
1538 if (TREE_CODE (i_val) == INTEGER_CST)
1540 unsigned HOST_WIDE_INT index;
1542 index = TREE_INT_CST_LOW (i_val);
1543 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1544 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1546 if (two_operand_p && (index & elements) != 0)
1547 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1548 else
1549 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1551 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1552 true, GSI_SAME_STMT);
1554 else
1556 tree cond = NULL_TREE, v0_val;
1558 if (two_operand_p)
1560 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1561 build_int_cst (mask_elt_type, elements));
1562 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1563 true, GSI_SAME_STMT);
1566 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1567 build_int_cst (mask_elt_type, elements - 1));
1568 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1569 true, GSI_SAME_STMT);
1571 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1572 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1573 true, GSI_SAME_STMT);
1575 if (two_operand_p)
1577 tree v1_val;
1579 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1580 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1581 true, GSI_SAME_STMT);
1583 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1584 cond, build_zero_cst (mask_elt_type));
1585 cond = fold_build3 (COND_EXPR, vect_elt_type,
1586 cond, v0_val, v1_val);
1587 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1588 true, GSI_SAME_STMT);
1590 else
1591 t = v0_val;
1594 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1597 constr = build_constructor (vect_type, v);
1598 gimple_assign_set_rhs_from_tree (gsi, constr);
1599 update_stmt (gsi_stmt (*gsi));
1602 /* If OP is a uniform vector return the element it is a splat from. */
1604 static tree
1605 ssa_uniform_vector_p (tree op)
1607 if (TREE_CODE (op) == VECTOR_CST
1608 || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1609 || TREE_CODE (op) == CONSTRUCTOR)
1610 return uniform_vector_p (op);
1611 if (TREE_CODE (op) == SSA_NAME)
1613 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1614 if (gimple_assign_single_p (def_stmt))
1615 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1617 return NULL_TREE;
1620 /* Return type in which CODE operation with optab OP can be
1621 computed. */
1623 static tree
1624 get_compute_type (enum tree_code code, optab op, tree type)
1626 /* For very wide vectors, try using a smaller vector mode. */
1627 tree compute_type = type;
1628 if (op
1629 && (!VECTOR_MODE_P (TYPE_MODE (type))
1630 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1632 tree vector_compute_type
1633 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1634 if (vector_compute_type != NULL_TREE
1635 && subparts_gt (compute_type, vector_compute_type)
1636 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1637 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1638 != CODE_FOR_nothing))
1639 compute_type = vector_compute_type;
1642 /* If we are breaking a BLKmode vector into smaller pieces,
1643 type_for_widest_vector_mode has already looked into the optab,
1644 so skip these checks. */
1645 if (compute_type == type)
1647 machine_mode compute_mode = TYPE_MODE (compute_type);
1648 if (VECTOR_MODE_P (compute_mode))
1650 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1651 return compute_type;
1652 if (code == MULT_HIGHPART_EXPR
1653 && can_mult_highpart_p (compute_mode,
1654 TYPE_UNSIGNED (compute_type)))
1655 return compute_type;
1657 /* There is no operation in hardware, so fall back to scalars. */
1658 compute_type = TREE_TYPE (type);
1661 return compute_type;
1664 static tree
1665 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1666 tree bitpos, tree bitsize, enum tree_code code,
1667 tree type ATTRIBUTE_UNUSED)
1669 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1670 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1671 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1672 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1673 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1674 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1677 /* Expand a vector COND_EXPR to scalars, piecewise. */
1678 static void
1679 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1681 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1682 tree type = gimple_expr_type (stmt);
1683 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1684 machine_mode compute_mode = TYPE_MODE (compute_type);
1685 gcc_assert (compute_mode != BLKmode);
1686 tree lhs = gimple_assign_lhs (stmt);
1687 tree rhs2 = gimple_assign_rhs2 (stmt);
1688 tree rhs3 = gimple_assign_rhs3 (stmt);
1689 tree new_rhs;
1691 /* If the compute mode is not a vector mode (hence we are not decomposing
1692 a BLKmode vector to smaller, hardware-supported vectors), we may want
1693 to expand the operations in parallel. */
1694 if (!VECTOR_MODE_P (compute_mode))
1695 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1696 COND_EXPR);
1697 else
1698 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1699 rhs2, rhs3, COND_EXPR);
1700 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1701 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1702 new_rhs);
1704 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1705 way to do it is change expand_vector_operation and its callees to
1706 return a tree_code, RHS1 and RHS2 instead of a tree. */
1707 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1708 update_stmt (gsi_stmt (*gsi));
1711 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1712 lowering. If INNER_TYPE is not a vector type, this is a scalar
1713 fallback. */
1715 static tree
1716 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1717 tree decl, tree bitpos, tree bitsize,
1718 enum tree_code code, tree type)
1720 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1721 if (!VECTOR_TYPE_P (inner_type))
1722 return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1723 if (code == CALL_EXPR)
1725 gimple *g = gimple_build_call (decl, 1, a);
1726 tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1727 gimple_call_set_lhs (g, lhs);
1728 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1729 return lhs;
1731 else
1733 tree outer_type = build_vector_type (TREE_TYPE (type),
1734 TYPE_VECTOR_SUBPARTS (inner_type));
1735 return gimplify_build1 (gsi, code, outer_type, a);
1739 /* Similarly, but for narrowing conversion. */
1741 static tree
1742 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1743 tree, tree bitpos, tree, enum tree_code code,
1744 tree type)
1746 tree itype = build_vector_type (TREE_TYPE (inner_type),
1747 exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1748 2));
1749 tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1750 tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1751 int_const_binop (PLUS_EXPR, bitpos,
1752 TYPE_SIZE (itype)));
1753 tree outer_type = build_vector_type (TREE_TYPE (type),
1754 TYPE_VECTOR_SUBPARTS (inner_type));
1755 return gimplify_build2 (gsi, code, outer_type, b, c);
1758 /* Expand VEC_CONVERT ifn call. */
1760 static void
1761 expand_vector_conversion (gimple_stmt_iterator *gsi)
1763 gimple *stmt = gsi_stmt (*gsi);
1764 gimple *g;
1765 tree lhs = gimple_call_lhs (stmt);
1766 if (lhs == NULL_TREE)
1768 g = gimple_build_nop ();
1769 gsi_replace (gsi, g, false);
1770 return;
1772 tree arg = gimple_call_arg (stmt, 0);
1773 tree ret_type = TREE_TYPE (lhs);
1774 tree arg_type = TREE_TYPE (arg);
1775 tree new_rhs, compute_type = TREE_TYPE (arg_type);
1776 enum tree_code code = NOP_EXPR;
1777 enum tree_code code1 = ERROR_MARK;
1778 enum { NARROW, NONE, WIDEN } modifier = NONE;
1779 optab optab1 = unknown_optab;
1781 gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1782 if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1783 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1784 code = FIX_TRUNC_EXPR;
1785 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1786 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1787 code = FLOAT_EXPR;
1788 unsigned int ret_elt_bits = vector_element_bits (ret_type);
1789 unsigned int arg_elt_bits = vector_element_bits (arg_type);
1790 if (ret_elt_bits < arg_elt_bits)
1791 modifier = NARROW;
1792 else if (ret_elt_bits > arg_elt_bits)
1793 modifier = WIDEN;
1795 if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1797 if (supportable_convert_operation (code, ret_type, arg_type, &code1))
1799 g = gimple_build_assign (lhs, code1, arg);
1800 gsi_replace (gsi, g, false);
1801 return;
1803 /* Can't use get_compute_type here, as supportable_convert_operation
1804 doesn't necessarily use an optab and needs two arguments. */
1805 tree vec_compute_type
1806 = type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
1807 if (vec_compute_type
1808 && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
1809 && subparts_gt (arg_type, vec_compute_type))
1811 unsigned HOST_WIDE_INT nelts
1812 = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1813 while (nelts > 1)
1815 tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1816 tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1817 if (supportable_convert_operation (code, ret1_type, arg1_type,
1818 &code1))
1820 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1821 ret_type, arg1_type, arg,
1822 NULL_TREE, code1);
1823 g = gimple_build_assign (lhs, new_rhs);
1824 gsi_replace (gsi, g, false);
1825 return;
1827 nelts = nelts / 2;
1831 else if (modifier == NARROW)
1833 switch (code)
1835 CASE_CONVERT:
1836 code1 = VEC_PACK_TRUNC_EXPR;
1837 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1838 break;
1839 case FIX_TRUNC_EXPR:
1840 code1 = VEC_PACK_FIX_TRUNC_EXPR;
1841 /* The signedness is determined from output operand. */
1842 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1843 break;
1844 case FLOAT_EXPR:
1845 code1 = VEC_PACK_FLOAT_EXPR;
1846 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1847 break;
1848 default:
1849 gcc_unreachable ();
1852 if (optab1)
1853 compute_type = get_compute_type (code1, optab1, arg_type);
1854 enum insn_code icode1;
1855 if (VECTOR_TYPE_P (compute_type)
1856 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1857 != CODE_FOR_nothing)
1858 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1860 tree cretd_type
1861 = build_vector_type (TREE_TYPE (ret_type),
1862 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1863 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1865 if (compute_type == arg_type)
1867 new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1868 arg, build_zero_cst (arg_type));
1869 new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1870 TYPE_SIZE (ret_type),
1871 bitsize_int (0));
1872 g = gimple_build_assign (lhs, new_rhs);
1873 gsi_replace (gsi, g, false);
1874 return;
1876 tree dcompute_type
1877 = build_vector_type (TREE_TYPE (compute_type),
1878 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1879 if (TYPE_MAIN_VARIANT (dcompute_type)
1880 == TYPE_MAIN_VARIANT (arg_type))
1881 new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1882 NULL_TREE, bitsize_int (0),
1883 NULL_TREE, code1,
1884 ret_type);
1885 else
1886 new_rhs = expand_vector_piecewise (gsi,
1887 do_vec_narrow_conversion,
1888 arg_type, dcompute_type,
1889 arg, NULL_TREE, code1,
1890 ret_type);
1891 g = gimple_build_assign (lhs, new_rhs);
1892 gsi_replace (gsi, g, false);
1893 return;
1897 else if (modifier == WIDEN)
1899 enum tree_code code2 = ERROR_MARK;
1900 optab optab2 = unknown_optab;
1901 switch (code)
1903 CASE_CONVERT:
1904 code1 = VEC_UNPACK_LO_EXPR;
1905 code2 = VEC_UNPACK_HI_EXPR;
1906 break;
1907 case FIX_TRUNC_EXPR:
1908 code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
1909 code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
1910 break;
1911 case FLOAT_EXPR:
1912 code1 = VEC_UNPACK_FLOAT_LO_EXPR;
1913 code2 = VEC_UNPACK_FLOAT_HI_EXPR;
1914 break;
1915 default:
1916 gcc_unreachable ();
1918 if (BYTES_BIG_ENDIAN)
1919 std::swap (code1, code2);
1921 if (code == FIX_TRUNC_EXPR)
1923 /* The signedness is determined from output operand. */
1924 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1925 optab2 = optab_for_tree_code (code2, ret_type, optab_default);
1927 else
1929 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1930 optab2 = optab_for_tree_code (code2, arg_type, optab_default);
1933 if (optab1 && optab2)
1934 compute_type = get_compute_type (code1, optab1, arg_type);
1936 enum insn_code icode1, icode2;
1937 if (VECTOR_TYPE_P (compute_type)
1938 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1939 != CODE_FOR_nothing)
1940 && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
1941 != CODE_FOR_nothing)
1942 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
1943 && (insn_data[icode1].operand[0].mode
1944 == insn_data[icode2].operand[0].mode))
1946 poly_uint64 nunits
1947 = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
1948 tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
1949 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1951 vec<constructor_elt, va_gc> *v;
1952 tree part_width = TYPE_SIZE (compute_type);
1953 tree index = bitsize_int (0);
1954 int nunits = nunits_for_known_piecewise_op (arg_type);
1955 int delta = tree_to_uhwi (part_width) / arg_elt_bits;
1956 int i;
1957 location_t loc = gimple_location (gsi_stmt (*gsi));
1959 if (compute_type != arg_type)
1960 warning_at (loc, OPT_Wvector_operation_performance,
1961 "vector operation will be expanded piecewise");
1962 else
1964 nunits = 1;
1965 delta = 1;
1968 vec_alloc (v, (nunits + delta - 1) / delta * 2);
1969 for (i = 0; i < nunits;
1970 i += delta, index = int_const_binop (PLUS_EXPR, index,
1971 part_width))
1973 tree a = arg;
1974 if (compute_type != arg_type)
1975 a = tree_vec_extract (gsi, compute_type, a, part_width,
1976 index);
1977 tree result = gimplify_build1 (gsi, code1, cretd_type, a);
1978 constructor_elt ce = { NULL_TREE, result };
1979 v->quick_push (ce);
1980 ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
1981 v->quick_push (ce);
1984 new_rhs = build_constructor (ret_type, v);
1985 g = gimple_build_assign (lhs, new_rhs);
1986 gsi_replace (gsi, g, false);
1987 return;
1992 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
1993 TREE_TYPE (arg_type), arg,
1994 NULL_TREE, code, ret_type);
1995 g = gimple_build_assign (lhs, new_rhs);
1996 gsi_replace (gsi, g, false);
1999 /* Process one statement. If we identify a vector operation, expand it. */
2001 static void
2002 expand_vector_operations_1 (gimple_stmt_iterator *gsi,
2003 bitmap dce_ssa_names)
2005 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
2006 enum tree_code code;
2007 optab op = unknown_optab;
2008 enum gimple_rhs_class rhs_class;
2009 tree new_rhs;
2011 /* Only consider code == GIMPLE_ASSIGN. */
2012 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
2013 if (!stmt)
2015 if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
2016 expand_vector_conversion (gsi);
2017 return;
2020 code = gimple_assign_rhs_code (stmt);
2021 rhs_class = get_gimple_rhs_class (code);
2022 lhs = gimple_assign_lhs (stmt);
2024 if (code == VEC_PERM_EXPR)
2026 lower_vec_perm (gsi);
2027 return;
2030 if (code == VEC_COND_EXPR)
2032 expand_vector_condition (gsi, dce_ssa_names);
2033 return;
2036 if (code == COND_EXPR
2037 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
2038 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
2040 expand_vector_scalar_condition (gsi);
2041 return;
2044 if (code == CONSTRUCTOR
2045 && TREE_CODE (lhs) == SSA_NAME
2046 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2047 && !gimple_clobber_p (stmt)
2048 && optimize)
2050 optimize_vector_constructor (gsi);
2051 return;
2054 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2055 return;
2057 rhs1 = gimple_assign_rhs1 (stmt);
2058 type = gimple_expr_type (stmt);
2059 if (rhs_class == GIMPLE_BINARY_RHS)
2060 rhs2 = gimple_assign_rhs2 (stmt);
2062 if (!VECTOR_TYPE_P (type)
2063 || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2064 return;
2066 /* A scalar operation pretending to be a vector one. */
2067 if (VECTOR_BOOLEAN_TYPE_P (type)
2068 && !VECTOR_MODE_P (TYPE_MODE (type))
2069 && TYPE_MODE (type) != BLKmode
2070 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2071 || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2072 && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2073 && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2074 return;
2076 /* If the vector operation is operating on all same vector elements
2077 implement it with a scalar operation and a splat if the target
2078 supports the scalar operation. */
2079 tree srhs1, srhs2 = NULL_TREE;
2080 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2081 && (rhs2 == NULL_TREE
2082 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2083 && (srhs2 = rhs2))
2084 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2085 /* As we query direct optabs restrict to non-convert operations. */
2086 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2088 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2089 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2090 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2092 tree slhs = make_ssa_name (TREE_TYPE (srhs1));
2093 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
2094 gsi_insert_before (gsi, repl, GSI_SAME_STMT);
2095 gimple_assign_set_rhs_from_tree (gsi,
2096 build_vector_from_val (type, slhs));
2097 update_stmt (stmt);
2098 return;
2102 if (CONVERT_EXPR_CODE_P (code)
2103 || code == FLOAT_EXPR
2104 || code == FIX_TRUNC_EXPR
2105 || code == VIEW_CONVERT_EXPR)
2106 return;
2108 /* The signedness is determined from input argument. */
2109 if (code == VEC_UNPACK_FLOAT_HI_EXPR
2110 || code == VEC_UNPACK_FLOAT_LO_EXPR
2111 || code == VEC_PACK_FLOAT_EXPR)
2113 /* We do not know how to scalarize those. */
2114 return;
2117 /* For widening/narrowing vector operations, the relevant type is of the
2118 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
2119 calculated in the same way above. */
2120 if (code == WIDEN_SUM_EXPR
2121 || code == VEC_WIDEN_MULT_HI_EXPR
2122 || code == VEC_WIDEN_MULT_LO_EXPR
2123 || code == VEC_WIDEN_MULT_EVEN_EXPR
2124 || code == VEC_WIDEN_MULT_ODD_EXPR
2125 || code == VEC_UNPACK_HI_EXPR
2126 || code == VEC_UNPACK_LO_EXPR
2127 || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2128 || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2129 || code == VEC_PACK_TRUNC_EXPR
2130 || code == VEC_PACK_SAT_EXPR
2131 || code == VEC_PACK_FIX_TRUNC_EXPR
2132 || code == VEC_WIDEN_LSHIFT_HI_EXPR
2133 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2135 /* We do not know how to scalarize those. */
2136 return;
2139 /* Choose between vector shift/rotate by vector and vector shift/rotate by
2140 scalar */
2141 if (code == LSHIFT_EXPR
2142 || code == RSHIFT_EXPR
2143 || code == LROTATE_EXPR
2144 || code == RROTATE_EXPR)
2146 optab opv;
2148 /* Check whether we have vector <op> {x,x,x,x} where x
2149 could be a scalar variable or a constant. Transform
2150 vector <op> {x,x,x,x} ==> vector <op> scalar. */
2151 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2153 tree first;
2155 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2157 gimple_assign_set_rhs2 (stmt, first);
2158 update_stmt (stmt);
2159 rhs2 = first;
2163 opv = optab_for_tree_code (code, type, optab_vector);
2164 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2165 op = opv;
2166 else
2168 op = optab_for_tree_code (code, type, optab_scalar);
2170 compute_type = get_compute_type (code, op, type);
2171 if (compute_type == type)
2172 return;
2173 /* The rtl expander will expand vector/scalar as vector/vector
2174 if necessary. Pick one with wider vector type. */
2175 tree compute_vtype = get_compute_type (code, opv, type);
2176 if (subparts_gt (compute_vtype, compute_type))
2178 compute_type = compute_vtype;
2179 op = opv;
2183 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2185 if (compute_type == NULL_TREE)
2186 compute_type = get_compute_type (code, op, type);
2187 if (compute_type == type)
2188 return;
2189 /* Before splitting vector rotates into scalar rotates,
2190 see if we can't use vector shifts and BIT_IOR_EXPR
2191 instead. For vector by vector rotates we'd also
2192 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2193 for now, fold doesn't seem to create such rotates anyway. */
2194 if (compute_type == TREE_TYPE (type)
2195 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2197 optab oplv = vashl_optab, opl = ashl_optab;
2198 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2199 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2200 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2201 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2202 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2203 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2204 /* The rtl expander will expand vector/scalar as vector/vector
2205 if necessary. Pick one with wider vector type. */
2206 if (subparts_gt (compute_lvtype, compute_ltype))
2208 compute_ltype = compute_lvtype;
2209 opl = oplv;
2211 if (subparts_gt (compute_rvtype, compute_rtype))
2213 compute_rtype = compute_rvtype;
2214 opr = oprv;
2216 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2217 BIT_IOR_EXPR. */
2218 compute_type = compute_ltype;
2219 if (subparts_gt (compute_type, compute_rtype))
2220 compute_type = compute_rtype;
2221 if (subparts_gt (compute_type, compute_otype))
2222 compute_type = compute_otype;
2223 /* Verify all 3 operations can be performed in that type. */
2224 if (compute_type != TREE_TYPE (type))
2226 if (optab_handler (opl, TYPE_MODE (compute_type))
2227 == CODE_FOR_nothing
2228 || optab_handler (opr, TYPE_MODE (compute_type))
2229 == CODE_FOR_nothing
2230 || optab_handler (opo, TYPE_MODE (compute_type))
2231 == CODE_FOR_nothing)
2232 compute_type = TREE_TYPE (type);
2237 else
2238 op = optab_for_tree_code (code, type, optab_default);
2240 /* Optabs will try converting a negation into a subtraction, so
2241 look for it as well. TODO: negation of floating-point vectors
2242 might be turned into an exclusive OR toggling the sign bit. */
2243 if (op == unknown_optab
2244 && code == NEGATE_EXPR
2245 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2246 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2248 if (compute_type == NULL_TREE)
2249 compute_type = get_compute_type (code, op, type);
2250 if (compute_type == type)
2251 return;
2253 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code,
2254 dce_ssa_names);
2256 /* Leave expression untouched for later expansion. */
2257 if (new_rhs == NULL_TREE)
2258 return;
2260 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2261 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2262 new_rhs);
2264 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
2265 way to do it is change expand_vector_operation and its callees to
2266 return a tree_code, RHS1 and RHS2 instead of a tree. */
2267 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2268 update_stmt (gsi_stmt (*gsi));
2271 /* Use this to lower vector operations introduced by the vectorizer,
2272 if it may need the bit-twiddling tricks implemented in this file. */
2274 static unsigned int
2275 expand_vector_operations (void)
2277 gimple_stmt_iterator gsi;
2278 basic_block bb;
2279 bool cfg_changed = false;
2281 auto_bitmap dce_ssa_names;
2283 FOR_EACH_BB_FN (bb, cfun)
2285 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2287 expand_vector_operations_1 (&gsi, dce_ssa_names);
2288 /* ??? If we do not cleanup EH then we will ICE in
2289 verification. But in reality we have created wrong-code
2290 as we did not properly transition EH info and edges to
2291 the piecewise computations. */
2292 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2293 && gimple_purge_dead_eh_edges (bb))
2294 cfg_changed = true;
2298 simple_dce_from_worklist (dce_ssa_names);
2300 return cfg_changed ? TODO_cleanup_cfg : 0;
2303 namespace {
2305 const pass_data pass_data_lower_vector =
2307 GIMPLE_PASS, /* type */
2308 "veclower", /* name */
2309 OPTGROUP_VEC, /* optinfo_flags */
2310 TV_NONE, /* tv_id */
2311 PROP_cfg, /* properties_required */
2312 PROP_gimple_lvec, /* properties_provided */
2313 0, /* properties_destroyed */
2314 0, /* todo_flags_start */
2315 TODO_update_ssa, /* todo_flags_finish */
2318 class pass_lower_vector : public gimple_opt_pass
2320 public:
2321 pass_lower_vector (gcc::context *ctxt)
2322 : gimple_opt_pass (pass_data_lower_vector, ctxt)
2325 /* opt_pass methods: */
2326 virtual bool gate (function *fun)
2328 return !(fun->curr_properties & PROP_gimple_lvec);
2331 virtual unsigned int execute (function *)
2333 return expand_vector_operations ();
2336 }; // class pass_lower_vector
2338 } // anon namespace
2340 gimple_opt_pass *
2341 make_pass_lower_vector (gcc::context *ctxt)
2343 return new pass_lower_vector (ctxt);
2346 namespace {
2348 const pass_data pass_data_lower_vector_ssa =
2350 GIMPLE_PASS, /* type */
2351 "veclower2", /* name */
2352 OPTGROUP_VEC, /* optinfo_flags */
2353 TV_NONE, /* tv_id */
2354 PROP_cfg, /* properties_required */
2355 PROP_gimple_lvec, /* properties_provided */
2356 0, /* properties_destroyed */
2357 0, /* todo_flags_start */
2358 ( TODO_update_ssa
2359 | TODO_cleanup_cfg ), /* todo_flags_finish */
2362 class pass_lower_vector_ssa : public gimple_opt_pass
2364 public:
2365 pass_lower_vector_ssa (gcc::context *ctxt)
2366 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2369 /* opt_pass methods: */
2370 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
2371 virtual unsigned int execute (function *)
2373 return expand_vector_operations ();
2376 }; // class pass_lower_vector_ssa
2378 } // anon namespace
2380 gimple_opt_pass *
2381 make_pass_lower_vector_ssa (gcc::context *ctxt)
2383 return new pass_lower_vector_ssa (ctxt);
2386 #include "gt-tree-vect-generic.h"