testsuite: remove SPE tests.
[official-gcc.git] / gcc / tree-vect-generic.c
bloba4b56195903a8eb4222f1f6935e87722ae088076
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "tree-ssa-dce.h"
44 #include "recog.h" /* FIXME: for insn_data */
47 static void expand_vector_operations_1 (gimple_stmt_iterator *, auto_bitmap *);
49 /* Return the number of elements in a vector type TYPE that we have
50 already decided needs to be expanded piecewise. We don't support
51 this kind of expansion for variable-length vectors, since we should
52 always check for target support before introducing uses of those. */
53 static unsigned int
54 nunits_for_known_piecewise_op (const_tree type)
56 return TYPE_VECTOR_SUBPARTS (type).to_constant ();
59 /* Return true if TYPE1 has more elements than TYPE2, where either
60 type may be a vector or a scalar. */
62 static inline bool
63 subparts_gt (tree type1, tree type2)
65 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
66 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
67 return known_gt (n1, n2);
70 /* Build a constant of type TYPE, made of VALUE's bits replicated
71 every WIDTH bits to fit TYPE's precision. */
72 static tree
73 build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
75 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
76 / HOST_BITS_PER_WIDE_INT;
77 unsigned HOST_WIDE_INT low, mask;
78 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
79 int i;
81 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
83 if (width == HOST_BITS_PER_WIDE_INT)
84 low = value;
85 else
87 mask = ((HOST_WIDE_INT)1 << width) - 1;
88 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
91 for (i = 0; i < n; i++)
92 a[i] = low;
94 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
95 return wide_int_to_tree
96 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
99 static GTY(()) tree vector_inner_type;
100 static GTY(()) tree vector_last_type;
101 static GTY(()) int vector_last_nunits;
103 /* Return a suitable vector types made of SUBPARTS units each of mode
104 "word_mode" (the global variable). */
105 static tree
106 build_word_mode_vector_type (int nunits)
108 if (!vector_inner_type)
109 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
110 else if (vector_last_nunits == nunits)
112 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
113 return vector_last_type;
116 vector_last_nunits = nunits;
117 vector_last_type = build_vector_type (vector_inner_type, nunits);
118 return vector_last_type;
121 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
122 tree, tree, tree, tree, tree, enum tree_code,
123 tree);
125 tree
126 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
127 tree t, tree bitsize, tree bitpos)
129 if (TREE_CODE (t) == SSA_NAME)
131 gimple *def_stmt = SSA_NAME_DEF_STMT (t);
132 if (is_gimple_assign (def_stmt)
133 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
134 || (bitpos
135 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
136 t = gimple_assign_rhs1 (def_stmt);
138 if (bitpos)
140 if (TREE_CODE (type) == BOOLEAN_TYPE)
142 tree itype
143 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
144 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
145 bitsize, bitpos);
146 return gimplify_build2 (gsi, NE_EXPR, type, field,
147 build_zero_cst (itype));
149 else
150 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
152 else
153 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
156 static tree
157 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
158 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
159 enum tree_code code, tree type ATTRIBUTE_UNUSED)
161 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
162 return gimplify_build1 (gsi, code, inner_type, a);
165 static tree
166 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
167 tree bitpos, tree bitsize, enum tree_code code,
168 tree type ATTRIBUTE_UNUSED)
170 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
171 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
172 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
173 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
174 return gimplify_build2 (gsi, code, inner_type, a, b);
177 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
179 INNER_TYPE is the type of A and B elements
181 returned expression is of signed integer type with the
182 size equal to the size of INNER_TYPE. */
183 static tree
184 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
185 tree bitpos, tree bitsize, enum tree_code code, tree type)
187 tree stype = TREE_TYPE (type);
188 tree cst_false = build_zero_cst (stype);
189 tree cst_true = build_all_ones_cst (stype);
190 tree cmp;
192 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
193 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
195 cmp = build2 (code, boolean_type_node, a, b);
196 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
199 /* Expand vector addition to scalars. This does bit twiddling
200 in order to increase parallelism:
202 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
203 (a ^ b) & 0x80808080
205 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
206 (a ^ ~b) & 0x80808080
208 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
210 This optimization should be done only if 4 vector items or more
211 fit into a word. */
212 static tree
213 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
214 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
215 enum tree_code code, tree type ATTRIBUTE_UNUSED)
217 unsigned int width = vector_element_bits (TREE_TYPE (a));
218 tree inner_type = TREE_TYPE (TREE_TYPE (a));
219 unsigned HOST_WIDE_INT max;
220 tree low_bits, high_bits, a_low, b_low, result_low, signs;
222 max = GET_MODE_MASK (TYPE_MODE (inner_type));
223 low_bits = build_replicated_const (word_type, width, max >> 1);
224 high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
226 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
227 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
229 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
230 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
231 if (code == PLUS_EXPR)
232 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
233 else
235 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
236 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
239 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
240 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
241 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
244 static tree
245 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
246 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
247 tree bitsize ATTRIBUTE_UNUSED,
248 enum tree_code code ATTRIBUTE_UNUSED,
249 tree type ATTRIBUTE_UNUSED)
251 unsigned int width = vector_element_bits (TREE_TYPE (b));
252 tree inner_type = TREE_TYPE (TREE_TYPE (b));
253 HOST_WIDE_INT max;
254 tree low_bits, high_bits, b_low, result_low, signs;
256 max = GET_MODE_MASK (TYPE_MODE (inner_type));
257 low_bits = build_replicated_const (word_type, width, max >> 1);
258 high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
260 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
262 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
263 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
264 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
265 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
266 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
269 /* Expand a vector operation to scalars, by using many operations
270 whose type is the vector type's inner type. */
271 static tree
272 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
273 tree type, tree inner_type,
274 tree a, tree b, enum tree_code code,
275 tree ret_type = NULL_TREE)
277 vec<constructor_elt, va_gc> *v;
278 tree part_width = TYPE_SIZE (inner_type);
279 tree index = bitsize_int (0);
280 int nunits = nunits_for_known_piecewise_op (type);
281 int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
282 int i;
283 location_t loc = gimple_location (gsi_stmt (*gsi));
285 if (ret_type
286 || types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
287 warning_at (loc, OPT_Wvector_operation_performance,
288 "vector operation will be expanded piecewise");
289 else
290 warning_at (loc, OPT_Wvector_operation_performance,
291 "vector operation will be expanded in parallel");
293 if (!ret_type)
294 ret_type = type;
295 vec_alloc (v, (nunits + delta - 1) / delta);
296 for (i = 0; i < nunits;
297 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
299 tree result = f (gsi, inner_type, a, b, index, part_width, code,
300 ret_type);
301 constructor_elt ce = {NULL_TREE, result};
302 v->quick_push (ce);
305 return build_constructor (ret_type, v);
308 /* Expand a vector operation to scalars with the freedom to use
309 a scalar integer type, or to use a different size for the items
310 in the vector type. */
311 static tree
312 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
313 tree a, tree b, enum tree_code code)
315 tree result, compute_type;
316 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
317 location_t loc = gimple_location (gsi_stmt (*gsi));
319 /* We have three strategies. If the type is already correct, just do
320 the operation an element at a time. Else, if the vector is wider than
321 one word, do it a word at a time; finally, if the vector is smaller
322 than one word, do it as a scalar. */
323 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
324 return expand_vector_piecewise (gsi, f,
325 type, TREE_TYPE (type),
326 a, b, code);
327 else if (n_words > 1)
329 tree word_type = build_word_mode_vector_type (n_words);
330 result = expand_vector_piecewise (gsi, f,
331 word_type, TREE_TYPE (word_type),
332 a, b, code);
333 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
334 GSI_SAME_STMT);
336 else
338 /* Use a single scalar operation with a mode no wider than word_mode. */
339 scalar_int_mode mode
340 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
341 compute_type = lang_hooks.types.type_for_mode (mode, 1);
342 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
343 warning_at (loc, OPT_Wvector_operation_performance,
344 "vector operation will be expanded with a "
345 "single scalar operation");
348 return result;
351 /* Expand a vector operation to scalars; for integer types we can use
352 special bit twiddling tricks to do the sums a word at a time, using
353 function F_PARALLEL instead of F. These tricks are done only if
354 they can process at least four items, that is, only if the vector
355 holds at least four items and if a word can hold four items. */
356 static tree
357 expand_vector_addition (gimple_stmt_iterator *gsi,
358 elem_op_func f, elem_op_func f_parallel,
359 tree type, tree a, tree b, enum tree_code code)
361 int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
363 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
364 && parts_per_word >= 4
365 && nunits_for_known_piecewise_op (type) >= 4)
366 return expand_vector_parallel (gsi, f_parallel,
367 type, a, b, code);
368 else
369 return expand_vector_piecewise (gsi, f,
370 type, TREE_TYPE (type),
371 a, b, code);
374 /* Try to expand vector comparison expression OP0 CODE OP1 by
375 querying optab if the following expression:
376 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
377 can be expanded. */
378 static tree
379 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
380 tree op1, enum tree_code code)
382 tree t;
383 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
384 && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
386 if (VECTOR_BOOLEAN_TYPE_P (type)
387 && SCALAR_INT_MODE_P (TYPE_MODE (type))
388 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
389 TYPE_VECTOR_SUBPARTS (type)
390 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
391 (TREE_TYPE (type)))))
393 tree inner_type = TREE_TYPE (TREE_TYPE (op0));
394 tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
395 tree index = bitsize_int (0);
396 int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
397 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
398 tree ret_type = build_nonstandard_integer_type (prec, 1);
399 tree ret_inner_type = boolean_type_node;
400 int i;
401 location_t loc = gimple_location (gsi_stmt (*gsi));
402 t = build_zero_cst (ret_type);
404 if (TYPE_PRECISION (ret_inner_type) != 1)
405 ret_inner_type = build_nonstandard_integer_type (1, 1);
406 warning_at (loc, OPT_Wvector_operation_performance,
407 "vector operation will be expanded piecewise");
408 for (i = 0; i < nunits;
409 i++, index = int_const_binop (PLUS_EXPR, index, part_width))
411 tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
412 index);
413 tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
414 index);
415 tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
416 t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
417 bitsize_int (i));
419 t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
421 else
422 t = expand_vector_piecewise (gsi, do_compare, type,
423 TREE_TYPE (TREE_TYPE (op0)), op0, op1,
424 code);
426 else
427 t = NULL_TREE;
429 return t;
432 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
433 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
434 the result if successful, otherwise return NULL_TREE. */
435 static tree
436 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
438 optab op;
439 unsigned int i, nunits = nunits_for_known_piecewise_op (type);
440 bool scalar_shift = true;
442 for (i = 1; i < nunits; i++)
444 if (shiftcnts[i] != shiftcnts[0])
445 scalar_shift = false;
448 if (scalar_shift && shiftcnts[0] == 0)
449 return op0;
451 if (scalar_shift)
453 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
454 if (op != unknown_optab
455 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
456 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
457 build_int_cst (NULL_TREE, shiftcnts[0]));
460 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
461 if (op != unknown_optab
462 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
464 tree_vector_builder vec (type, nunits, 1);
465 for (i = 0; i < nunits; i++)
466 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
467 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
470 return NULL_TREE;
473 /* Try to expand integer vector division by constant using
474 widening multiply, shifts and additions. */
475 static tree
476 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
477 tree op1, enum tree_code code)
479 bool use_pow2 = true;
480 bool has_vector_shift = true;
481 bool use_abs_op1 = false;
482 int mode = -1, this_mode;
483 int pre_shift = -1, post_shift;
484 unsigned int nunits = nunits_for_known_piecewise_op (type);
485 int *shifts = XALLOCAVEC (int, nunits * 4);
486 int *pre_shifts = shifts + nunits;
487 int *post_shifts = pre_shifts + nunits;
488 int *shift_temps = post_shifts + nunits;
489 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
490 int prec = TYPE_PRECISION (TREE_TYPE (type));
491 int dummy_int;
492 unsigned int i;
493 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
494 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
495 tree cur_op, mulcst, tem;
496 optab op;
498 if (prec > HOST_BITS_PER_WIDE_INT)
499 return NULL_TREE;
501 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
502 if (op == unknown_optab
503 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
504 has_vector_shift = false;
506 /* Analysis phase. Determine if all op1 elements are either power
507 of two and it is possible to expand it using shifts (or for remainder
508 using masking). Additionally compute the multiplicative constants
509 and pre and post shifts if the division is to be expanded using
510 widening or high part multiplication plus shifts. */
511 for (i = 0; i < nunits; i++)
513 tree cst = VECTOR_CST_ELT (op1, i);
514 unsigned HOST_WIDE_INT ml;
516 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
517 return NULL_TREE;
518 pre_shifts[i] = 0;
519 post_shifts[i] = 0;
520 mulc[i] = 0;
521 if (use_pow2
522 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
523 use_pow2 = false;
524 if (use_pow2)
526 shifts[i] = tree_log2 (cst);
527 if (shifts[i] != shifts[0]
528 && code == TRUNC_DIV_EXPR
529 && !has_vector_shift)
530 use_pow2 = false;
532 if (mode == -2)
533 continue;
534 if (sign_p == UNSIGNED)
536 unsigned HOST_WIDE_INT mh;
537 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
539 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
540 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
541 return NULL_TREE;
543 if (d <= 1)
545 mode = -2;
546 continue;
549 /* Find a suitable multiplier and right shift count
550 instead of multiplying with D. */
551 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
553 /* If the suggested multiplier is more than SIZE bits, we can
554 do better for even divisors, using an initial right shift. */
555 if ((mh != 0 && (d & 1) == 0)
556 || (!has_vector_shift && pre_shift != -1))
558 if (has_vector_shift)
559 pre_shift = ctz_or_zero (d);
560 else if (pre_shift == -1)
562 unsigned int j;
563 for (j = 0; j < nunits; j++)
565 tree cst2 = VECTOR_CST_ELT (op1, j);
566 unsigned HOST_WIDE_INT d2;
567 int this_pre_shift;
569 if (!tree_fits_uhwi_p (cst2))
570 return NULL_TREE;
571 d2 = tree_to_uhwi (cst2) & mask;
572 if (d2 == 0)
573 return NULL_TREE;
574 this_pre_shift = floor_log2 (d2 & -d2);
575 if (pre_shift == -1 || this_pre_shift < pre_shift)
576 pre_shift = this_pre_shift;
578 if (i != 0 && pre_shift != 0)
580 /* Restart. */
581 i = -1U;
582 mode = -1;
583 continue;
586 if (pre_shift != 0)
588 if ((d >> pre_shift) <= 1)
590 mode = -2;
591 continue;
593 mh = choose_multiplier (d >> pre_shift, prec,
594 prec - pre_shift,
595 &ml, &post_shift, &dummy_int);
596 gcc_assert (!mh);
597 pre_shifts[i] = pre_shift;
600 if (!mh)
601 this_mode = 0;
602 else
603 this_mode = 1;
605 else
607 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
608 unsigned HOST_WIDE_INT abs_d;
610 if (d == -1)
611 return NULL_TREE;
613 /* Since d might be INT_MIN, we have to cast to
614 unsigned HOST_WIDE_INT before negating to avoid
615 undefined signed overflow. */
616 abs_d = (d >= 0
617 ? (unsigned HOST_WIDE_INT) d
618 : - (unsigned HOST_WIDE_INT) d);
620 /* n rem d = n rem -d */
621 if (code == TRUNC_MOD_EXPR && d < 0)
623 d = abs_d;
624 use_abs_op1 = true;
626 if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
628 /* This case is not handled correctly below. */
629 mode = -2;
630 continue;
632 if (abs_d <= 1)
634 mode = -2;
635 continue;
638 choose_multiplier (abs_d, prec, prec - 1, &ml,
639 &post_shift, &dummy_int);
640 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
642 this_mode = 4 + (d < 0);
643 ml |= HOST_WIDE_INT_M1U << (prec - 1);
645 else
646 this_mode = 2 + (d < 0);
648 mulc[i] = ml;
649 post_shifts[i] = post_shift;
650 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
651 || post_shift >= prec
652 || pre_shifts[i] >= prec)
653 this_mode = -2;
655 if (i == 0)
656 mode = this_mode;
657 else if (mode != this_mode)
658 mode = -2;
661 if (use_pow2)
663 tree addend = NULL_TREE;
664 if (sign_p == SIGNED)
666 tree uns_type;
668 /* Both division and remainder sequences need
669 op0 < 0 ? mask : 0 computed. It can be either computed as
670 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
671 if none of the shifts is 0, or as the conditional. */
672 for (i = 0; i < nunits; i++)
673 if (shifts[i] == 0)
674 break;
675 uns_type
676 = build_vector_type (build_nonstandard_integer_type (prec, 1),
677 nunits);
678 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
680 for (i = 0; i < nunits; i++)
681 shift_temps[i] = prec - 1;
682 cur_op = add_rshift (gsi, type, op0, shift_temps);
683 if (cur_op != NULL_TREE)
685 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
686 uns_type, cur_op);
687 for (i = 0; i < nunits; i++)
688 shift_temps[i] = prec - shifts[i];
689 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
690 if (cur_op != NULL_TREE)
691 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
692 type, cur_op);
695 if (addend == NULL_TREE
696 && expand_vec_cond_expr_p (type, type, LT_EXPR))
698 tree zero, cst, mask_type, mask;
699 gimple *stmt, *cond;
701 mask_type = truth_type_for (type);
702 zero = build_zero_cst (type);
703 mask = make_ssa_name (mask_type);
704 cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
705 gsi_insert_before (gsi, cond, GSI_SAME_STMT);
706 tree_vector_builder vec (type, nunits, 1);
707 for (i = 0; i < nunits; i++)
708 vec.quick_push (build_int_cst (TREE_TYPE (type),
709 (HOST_WIDE_INT_1U
710 << shifts[i]) - 1));
711 cst = vec.build ();
712 addend = make_ssa_name (type);
713 stmt
714 = gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
715 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
718 if (code == TRUNC_DIV_EXPR)
720 if (sign_p == UNSIGNED)
722 /* q = op0 >> shift; */
723 cur_op = add_rshift (gsi, type, op0, shifts);
724 if (cur_op != NULL_TREE)
725 return cur_op;
727 else if (addend != NULL_TREE)
729 /* t1 = op0 + addend;
730 q = t1 >> shift; */
731 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
732 if (op != unknown_optab
733 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
735 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
736 cur_op = add_rshift (gsi, type, cur_op, shifts);
737 if (cur_op != NULL_TREE)
738 return cur_op;
742 else
744 tree mask;
745 tree_vector_builder vec (type, nunits, 1);
746 for (i = 0; i < nunits; i++)
747 vec.quick_push (build_int_cst (TREE_TYPE (type),
748 (HOST_WIDE_INT_1U
749 << shifts[i]) - 1));
750 mask = vec.build ();
751 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
752 if (op != unknown_optab
753 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
755 if (sign_p == UNSIGNED)
756 /* r = op0 & mask; */
757 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
758 else if (addend != NULL_TREE)
760 /* t1 = op0 + addend;
761 t2 = t1 & mask;
762 r = t2 - addend; */
763 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
764 if (op != unknown_optab
765 && optab_handler (op, TYPE_MODE (type))
766 != CODE_FOR_nothing)
768 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
769 addend);
770 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
771 cur_op, mask);
772 op = optab_for_tree_code (MINUS_EXPR, type,
773 optab_default);
774 if (op != unknown_optab
775 && optab_handler (op, TYPE_MODE (type))
776 != CODE_FOR_nothing)
777 return gimplify_build2 (gsi, MINUS_EXPR, type,
778 cur_op, addend);
785 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
786 return NULL_TREE;
788 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
789 return NULL_TREE;
791 cur_op = op0;
793 switch (mode)
795 case 0:
796 gcc_assert (sign_p == UNSIGNED);
797 /* t1 = oprnd0 >> pre_shift;
798 t2 = t1 h* ml;
799 q = t2 >> post_shift; */
800 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
801 if (cur_op == NULL_TREE)
802 return NULL_TREE;
803 break;
804 case 1:
805 gcc_assert (sign_p == UNSIGNED);
806 for (i = 0; i < nunits; i++)
808 shift_temps[i] = 1;
809 post_shifts[i]--;
811 break;
812 case 2:
813 case 3:
814 case 4:
815 case 5:
816 gcc_assert (sign_p == SIGNED);
817 for (i = 0; i < nunits; i++)
818 shift_temps[i] = prec - 1;
819 break;
820 default:
821 return NULL_TREE;
824 tree_vector_builder vec (type, nunits, 1);
825 for (i = 0; i < nunits; i++)
826 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
827 mulcst = vec.build ();
829 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
831 switch (mode)
833 case 0:
834 /* t1 = oprnd0 >> pre_shift;
835 t2 = t1 h* ml;
836 q = t2 >> post_shift; */
837 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
838 break;
839 case 1:
840 /* t1 = oprnd0 h* ml;
841 t2 = oprnd0 - t1;
842 t3 = t2 >> 1;
843 t4 = t1 + t3;
844 q = t4 >> (post_shift - 1); */
845 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
846 if (op == unknown_optab
847 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
848 return NULL_TREE;
849 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
850 tem = add_rshift (gsi, type, tem, shift_temps);
851 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
852 if (op == unknown_optab
853 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
854 return NULL_TREE;
855 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
856 cur_op = add_rshift (gsi, type, tem, post_shifts);
857 if (cur_op == NULL_TREE)
858 return NULL_TREE;
859 break;
860 case 2:
861 case 3:
862 case 4:
863 case 5:
864 /* t1 = oprnd0 h* ml;
865 t2 = t1; [ iff (mode & 2) != 0 ]
866 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
867 t3 = t2 >> post_shift;
868 t4 = oprnd0 >> (prec - 1);
869 q = t3 - t4; [ iff (mode & 1) == 0 ]
870 q = t4 - t3; [ iff (mode & 1) != 0 ] */
871 if ((mode & 2) == 0)
873 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
874 if (op == unknown_optab
875 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
876 return NULL_TREE;
877 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
879 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
880 if (cur_op == NULL_TREE)
881 return NULL_TREE;
882 tem = add_rshift (gsi, type, op0, shift_temps);
883 if (tem == NULL_TREE)
884 return NULL_TREE;
885 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
886 if (op == unknown_optab
887 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
888 return NULL_TREE;
889 if ((mode & 1) == 0)
890 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
891 else
892 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
893 break;
894 default:
895 gcc_unreachable ();
898 if (code == TRUNC_DIV_EXPR)
899 return cur_op;
901 /* We divided. Now finish by:
902 t1 = q * oprnd1;
903 r = oprnd0 - t1; */
904 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
905 if (op == unknown_optab
906 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
907 return NULL_TREE;
908 if (use_abs_op1)
910 tree_vector_builder elts;
911 if (!elts.new_unary_operation (type, op1, false))
912 return NULL_TREE;
913 unsigned int count = elts.encoded_nelts ();
914 for (unsigned int i = 0; i < count; ++i)
916 tree elem1 = VECTOR_CST_ELT (op1, i);
918 tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
919 if (elt == NULL_TREE)
920 return NULL_TREE;
921 elts.quick_push (elt);
923 op1 = elts.build ();
925 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
926 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
927 if (op == unknown_optab
928 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
929 return NULL_TREE;
930 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
933 /* Expand a vector condition to scalars, by using many conditions
934 on the vector's elements. */
935 static void
936 expand_vector_condition (gimple_stmt_iterator *gsi, auto_bitmap *dce_ssa_names)
938 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
939 tree type = gimple_expr_type (stmt);
940 tree a = gimple_assign_rhs1 (stmt);
941 tree a1 = a;
942 tree a2 = NULL_TREE;
943 bool a_is_comparison = false;
944 bool a_is_scalar_bitmask = false;
945 tree b = gimple_assign_rhs2 (stmt);
946 tree c = gimple_assign_rhs3 (stmt);
947 vec<constructor_elt, va_gc> *v;
948 tree constr;
949 tree inner_type = TREE_TYPE (type);
950 tree width = vector_element_bits_tree (type);
951 tree cond_type = TREE_TYPE (TREE_TYPE (a));
952 tree comp_inner_type = cond_type;
953 tree index = bitsize_int (0);
954 tree comp_width = width;
955 tree comp_index = index;
956 location_t loc = gimple_location (gsi_stmt (*gsi));
957 tree_code code = TREE_CODE (a);
958 gassign *assign = NULL;
960 if (code == SSA_NAME)
962 assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (a));
963 if (assign != NULL
964 && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
966 a_is_comparison = true;
967 a1 = gimple_assign_rhs1 (assign);
968 a2 = gimple_assign_rhs2 (assign);
969 code = gimple_assign_rhs_code (assign);
970 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
971 comp_width = vector_element_bits_tree (TREE_TYPE (a1));
975 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code))
977 gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
978 return;
981 /* Handle vector boolean types with bitmasks. If there is a comparison
982 and we can expand the comparison into the vector boolean bitmask,
983 or otherwise if it is compatible with type, we can transform
984 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
985 into
986 tmp_6 = x_2 < y_3;
987 tmp_7 = tmp_6 & vbfld_4;
988 tmp_8 = ~tmp_6;
989 tmp_9 = tmp_8 & vbfld_5;
990 vbfld_1 = tmp_7 | tmp_9;
991 Similarly for vbfld_10 instead of x_2 < y_3. */
992 if (VECTOR_BOOLEAN_TYPE_P (type)
993 && SCALAR_INT_MODE_P (TYPE_MODE (type))
994 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
995 TYPE_VECTOR_SUBPARTS (type)
996 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
997 && (a_is_comparison
998 ? useless_type_conversion_p (type, TREE_TYPE (a))
999 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
1001 if (a_is_comparison)
1002 a = gimplify_build2 (gsi, code, type, a1, a2);
1003 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
1004 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
1005 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
1006 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
1007 gimple_assign_set_rhs_from_tree (gsi, a);
1008 update_stmt (gsi_stmt (*gsi));
1009 return;
1012 /* TODO: try and find a smaller vector type. */
1014 warning_at (loc, OPT_Wvector_operation_performance,
1015 "vector condition will be expanded piecewise");
1017 if (!a_is_comparison
1018 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1019 && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1020 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1021 TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1022 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1023 (TREE_TYPE (TREE_TYPE (a))))))
1025 a_is_scalar_bitmask = true;
1026 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1027 tree atype = build_nonstandard_integer_type (prec, 1);
1028 a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1031 int nunits = nunits_for_known_piecewise_op (type);
1032 vec_alloc (v, nunits);
1033 for (int i = 0; i < nunits; i++)
1035 tree aa, result;
1036 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1037 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1038 if (a_is_comparison)
1040 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1041 comp_width, comp_index);
1042 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1043 comp_width, comp_index);
1044 aa = fold_build2 (code, cond_type, aa1, aa2);
1046 else if (a_is_scalar_bitmask)
1048 wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1049 result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1050 a, wide_int_to_tree (TREE_TYPE (a), w));
1051 aa = fold_build2 (NE_EXPR, boolean_type_node, result,
1052 build_zero_cst (TREE_TYPE (a)));
1054 else
1055 aa = tree_vec_extract (gsi, cond_type, a, width, index);
1056 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1057 constructor_elt ce = {NULL_TREE, result};
1058 v->quick_push (ce);
1059 index = int_const_binop (PLUS_EXPR, index, width);
1060 if (width == comp_width)
1061 comp_index = index;
1062 else
1063 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1066 constr = build_constructor (type, v);
1067 gimple_assign_set_rhs_from_tree (gsi, constr);
1068 update_stmt (gsi_stmt (*gsi));
1070 if (a_is_comparison)
1071 bitmap_set_bit (*dce_ssa_names,
1072 SSA_NAME_VERSION (gimple_assign_lhs (assign)));
1075 static tree
1076 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1077 gassign *assign, enum tree_code code)
1079 machine_mode compute_mode = TYPE_MODE (compute_type);
1081 /* If the compute mode is not a vector mode (hence we are not decomposing
1082 a BLKmode vector to smaller, hardware-supported vectors), we may want
1083 to expand the operations in parallel. */
1084 if (!VECTOR_MODE_P (compute_mode))
1085 switch (code)
1087 case PLUS_EXPR:
1088 case MINUS_EXPR:
1089 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1090 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1091 gimple_assign_rhs1 (assign),
1092 gimple_assign_rhs2 (assign), code);
1093 break;
1095 case NEGATE_EXPR:
1096 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1097 return expand_vector_addition (gsi, do_unop, do_negate, type,
1098 gimple_assign_rhs1 (assign),
1099 NULL_TREE, code);
1100 break;
1102 case BIT_AND_EXPR:
1103 case BIT_IOR_EXPR:
1104 case BIT_XOR_EXPR:
1105 return expand_vector_parallel (gsi, do_binop, type,
1106 gimple_assign_rhs1 (assign),
1107 gimple_assign_rhs2 (assign), code);
1109 case BIT_NOT_EXPR:
1110 return expand_vector_parallel (gsi, do_unop, type,
1111 gimple_assign_rhs1 (assign),
1112 NULL_TREE, code);
1113 case EQ_EXPR:
1114 case NE_EXPR:
1115 case GT_EXPR:
1116 case LT_EXPR:
1117 case GE_EXPR:
1118 case LE_EXPR:
1119 case UNEQ_EXPR:
1120 case UNGT_EXPR:
1121 case UNLT_EXPR:
1122 case UNGE_EXPR:
1123 case UNLE_EXPR:
1124 case LTGT_EXPR:
1125 case ORDERED_EXPR:
1126 case UNORDERED_EXPR:
1128 tree rhs1 = gimple_assign_rhs1 (assign);
1129 tree rhs2 = gimple_assign_rhs2 (assign);
1131 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1134 case TRUNC_DIV_EXPR:
1135 case TRUNC_MOD_EXPR:
1137 tree rhs1 = gimple_assign_rhs1 (assign);
1138 tree rhs2 = gimple_assign_rhs2 (assign);
1139 tree ret;
1141 if (!optimize
1142 || !VECTOR_INTEGER_TYPE_P (type)
1143 || TREE_CODE (rhs2) != VECTOR_CST
1144 || !VECTOR_MODE_P (TYPE_MODE (type)))
1145 break;
1147 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1148 if (ret != NULL_TREE)
1149 return ret;
1150 break;
1153 default:
1154 break;
1157 if (TREE_CODE_CLASS (code) == tcc_unary)
1158 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1159 gimple_assign_rhs1 (assign),
1160 NULL_TREE, code);
1161 else
1162 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1163 gimple_assign_rhs1 (assign),
1164 gimple_assign_rhs2 (assign), code);
1167 /* Try to optimize
1168 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1169 style stmts into:
1170 _9 = { b_7, b_7, b_7, b_7 };
1171 a_5 = _9 + { 0, 3, 6, 9 };
1172 because vector splat operation is usually more efficient
1173 than piecewise initialization of the vector. */
1175 static void
1176 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1178 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1179 tree lhs = gimple_assign_lhs (stmt);
1180 tree rhs = gimple_assign_rhs1 (stmt);
1181 tree type = TREE_TYPE (rhs);
1182 unsigned int i, j;
1183 unsigned HOST_WIDE_INT nelts;
1184 bool all_same = true;
1185 constructor_elt *elt;
1186 gimple *g;
1187 tree base = NULL_TREE;
1188 optab op;
1190 if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1191 || nelts <= 2
1192 || CONSTRUCTOR_NELTS (rhs) != nelts)
1193 return;
1194 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1195 if (op == unknown_optab
1196 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1197 return;
1198 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1199 if (TREE_CODE (elt->value) != SSA_NAME
1200 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1201 return;
1202 else
1204 tree this_base = elt->value;
1205 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1206 all_same = false;
1207 for (j = 0; j < nelts + 1; j++)
1209 g = SSA_NAME_DEF_STMT (this_base);
1210 if (is_gimple_assign (g)
1211 && gimple_assign_rhs_code (g) == PLUS_EXPR
1212 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1213 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1214 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1215 this_base = gimple_assign_rhs1 (g);
1216 else
1217 break;
1219 if (i == 0)
1220 base = this_base;
1221 else if (this_base != base)
1222 return;
1224 if (all_same)
1225 return;
1226 tree_vector_builder cst (type, nelts, 1);
1227 for (i = 0; i < nelts; i++)
1229 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1230 tree elt = build_zero_cst (TREE_TYPE (base));
1231 while (this_base != base)
1233 g = SSA_NAME_DEF_STMT (this_base);
1234 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1235 elt, gimple_assign_rhs2 (g));
1236 if (elt == NULL_TREE
1237 || TREE_CODE (elt) != INTEGER_CST
1238 || TREE_OVERFLOW (elt))
1239 return;
1240 this_base = gimple_assign_rhs1 (g);
1242 cst.quick_push (elt);
1244 for (i = 0; i < nelts; i++)
1245 CONSTRUCTOR_ELT (rhs, i)->value = base;
1246 g = gimple_build_assign (make_ssa_name (type), rhs);
1247 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1248 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1249 cst.build ());
1250 gsi_replace (gsi, g, false);
1253 /* Return a type for the widest vector mode whose components are of type
1254 TYPE, or NULL_TREE if none is found. */
1256 static tree
1257 type_for_widest_vector_mode (tree type, optab op)
1259 machine_mode inner_mode = TYPE_MODE (type);
1260 machine_mode best_mode = VOIDmode, mode;
1261 poly_int64 best_nunits = 0;
1263 if (SCALAR_FLOAT_MODE_P (inner_mode))
1264 mode = MIN_MODE_VECTOR_FLOAT;
1265 else if (SCALAR_FRACT_MODE_P (inner_mode))
1266 mode = MIN_MODE_VECTOR_FRACT;
1267 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1268 mode = MIN_MODE_VECTOR_UFRACT;
1269 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1270 mode = MIN_MODE_VECTOR_ACCUM;
1271 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1272 mode = MIN_MODE_VECTOR_UACCUM;
1273 else if (inner_mode == BImode)
1274 mode = MIN_MODE_VECTOR_BOOL;
1275 else
1276 mode = MIN_MODE_VECTOR_INT;
1278 FOR_EACH_MODE_FROM (mode, mode)
1279 if (GET_MODE_INNER (mode) == inner_mode
1280 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1281 && optab_handler (op, mode) != CODE_FOR_nothing)
1282 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1284 if (best_mode == VOIDmode)
1285 return NULL_TREE;
1286 else
1287 return build_vector_type_for_mode (type, best_mode);
1291 /* Build a reference to the element of the vector VECT. Function
1292 returns either the element itself, either BIT_FIELD_REF, or an
1293 ARRAY_REF expression.
1295 GSI is required to insert temporary variables while building a
1296 refernece to the element of the vector VECT.
1298 PTMPVEC is a pointer to the temporary variable for caching
1299 purposes. In case when PTMPVEC is NULL new temporary variable
1300 will be created. */
1301 static tree
1302 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1304 tree vect_type, vect_elt_type;
1305 gimple *asgn;
1306 tree tmpvec;
1307 tree arraytype;
1308 bool need_asgn = true;
1309 unsigned int elements;
1311 vect_type = TREE_TYPE (vect);
1312 vect_elt_type = TREE_TYPE (vect_type);
1313 elements = nunits_for_known_piecewise_op (vect_type);
1315 if (TREE_CODE (idx) == INTEGER_CST)
1317 unsigned HOST_WIDE_INT index;
1319 /* Given that we're about to compute a binary modulus,
1320 we don't care about the high bits of the value. */
1321 index = TREE_INT_CST_LOW (idx);
1322 if (!tree_fits_uhwi_p (idx) || index >= elements)
1324 index &= elements - 1;
1325 idx = build_int_cst (TREE_TYPE (idx), index);
1328 /* When lowering a vector statement sequence do some easy
1329 simplification by looking through intermediate vector results. */
1330 if (TREE_CODE (vect) == SSA_NAME)
1332 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1333 if (is_gimple_assign (def_stmt)
1334 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1335 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1336 vect = gimple_assign_rhs1 (def_stmt);
1339 if (TREE_CODE (vect) == VECTOR_CST)
1340 return VECTOR_CST_ELT (vect, index);
1341 else if (TREE_CODE (vect) == CONSTRUCTOR
1342 && (CONSTRUCTOR_NELTS (vect) == 0
1343 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1344 != VECTOR_TYPE))
1346 if (index < CONSTRUCTOR_NELTS (vect))
1347 return CONSTRUCTOR_ELT (vect, index)->value;
1348 return build_zero_cst (vect_elt_type);
1350 else
1352 tree size = vector_element_bits_tree (vect_type);
1353 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1354 size);
1355 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1359 if (!ptmpvec)
1360 tmpvec = create_tmp_var (vect_type, "vectmp");
1361 else if (!*ptmpvec)
1362 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1363 else
1365 tmpvec = *ptmpvec;
1366 need_asgn = false;
1369 if (need_asgn)
1371 TREE_ADDRESSABLE (tmpvec) = 1;
1372 asgn = gimple_build_assign (tmpvec, vect);
1373 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1376 arraytype = build_array_type_nelts (vect_elt_type, elements);
1377 return build4 (ARRAY_REF, vect_elt_type,
1378 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1379 idx, NULL_TREE, NULL_TREE);
1382 /* Check if VEC_PERM_EXPR within the given setting is supported
1383 by hardware, or lower it piecewise.
1385 When VEC_PERM_EXPR has the same first and second operands:
1386 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1387 {v0[mask[0]], v0[mask[1]], ...}
1388 MASK and V0 must have the same number of elements.
1390 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1391 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1392 V0 and V1 must have the same type. MASK, V0, V1 must have the
1393 same number of arguments. */
1395 static void
1396 lower_vec_perm (gimple_stmt_iterator *gsi)
1398 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1399 tree mask = gimple_assign_rhs3 (stmt);
1400 tree vec0 = gimple_assign_rhs1 (stmt);
1401 tree vec1 = gimple_assign_rhs2 (stmt);
1402 tree vect_type = TREE_TYPE (vec0);
1403 tree mask_type = TREE_TYPE (mask);
1404 tree vect_elt_type = TREE_TYPE (vect_type);
1405 tree mask_elt_type = TREE_TYPE (mask_type);
1406 unsigned HOST_WIDE_INT elements;
1407 vec<constructor_elt, va_gc> *v;
1408 tree constr, t, si, i_val;
1409 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1410 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1411 location_t loc = gimple_location (gsi_stmt (*gsi));
1412 unsigned i;
1414 if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1415 return;
1417 if (TREE_CODE (mask) == SSA_NAME)
1419 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1420 if (is_gimple_assign (def_stmt)
1421 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1422 mask = gimple_assign_rhs1 (def_stmt);
1425 vec_perm_builder sel_int;
1427 if (TREE_CODE (mask) == VECTOR_CST
1428 && tree_to_vec_perm_builder (&sel_int, mask))
1430 vec_perm_indices indices (sel_int, 2, elements);
1431 if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1433 gimple_assign_set_rhs3 (stmt, mask);
1434 update_stmt (stmt);
1435 return;
1437 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1438 vector as VEC1 and a right element shift MASK. */
1439 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1440 != CODE_FOR_nothing
1441 && TREE_CODE (vec1) == VECTOR_CST
1442 && initializer_zerop (vec1)
1443 && maybe_ne (indices[0], 0)
1444 && known_lt (poly_uint64 (indices[0]), elements))
1446 bool ok_p = indices.series_p (0, 1, indices[0], 1);
1447 if (!ok_p)
1449 for (i = 1; i < elements; ++i)
1451 poly_uint64 actual = indices[i];
1452 poly_uint64 expected = i + indices[0];
1453 /* Indices into the second vector are all equivalent. */
1454 if (maybe_lt (actual, elements)
1455 ? maybe_ne (actual, expected)
1456 : maybe_lt (expected, elements))
1457 break;
1459 ok_p = i == elements;
1461 if (ok_p)
1463 gimple_assign_set_rhs3 (stmt, mask);
1464 update_stmt (stmt);
1465 return;
1468 /* And similarly vec_shl pattern. */
1469 if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1470 != CODE_FOR_nothing
1471 && TREE_CODE (vec0) == VECTOR_CST
1472 && initializer_zerop (vec0))
1474 unsigned int first = 0;
1475 for (i = 0; i < elements; ++i)
1476 if (known_eq (poly_uint64 (indices[i]), elements))
1478 if (i == 0 || first)
1479 break;
1480 first = i;
1482 else if (first
1483 ? maybe_ne (poly_uint64 (indices[i]),
1484 elements + i - first)
1485 : maybe_ge (poly_uint64 (indices[i]), elements))
1486 break;
1487 if (i == elements)
1489 gimple_assign_set_rhs3 (stmt, mask);
1490 update_stmt (stmt);
1491 return;
1495 else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1496 return;
1498 warning_at (loc, OPT_Wvector_operation_performance,
1499 "vector shuffling operation will be expanded piecewise");
1501 vec_alloc (v, elements);
1502 for (i = 0; i < elements; i++)
1504 si = size_int (i);
1505 i_val = vector_element (gsi, mask, si, &masktmp);
1507 if (TREE_CODE (i_val) == INTEGER_CST)
1509 unsigned HOST_WIDE_INT index;
1511 index = TREE_INT_CST_LOW (i_val);
1512 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1513 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1515 if (two_operand_p && (index & elements) != 0)
1516 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1517 else
1518 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1520 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1521 true, GSI_SAME_STMT);
1523 else
1525 tree cond = NULL_TREE, v0_val;
1527 if (two_operand_p)
1529 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1530 build_int_cst (mask_elt_type, elements));
1531 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1532 true, GSI_SAME_STMT);
1535 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1536 build_int_cst (mask_elt_type, elements - 1));
1537 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1538 true, GSI_SAME_STMT);
1540 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1541 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1542 true, GSI_SAME_STMT);
1544 if (two_operand_p)
1546 tree v1_val;
1548 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1549 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1550 true, GSI_SAME_STMT);
1552 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1553 cond, build_zero_cst (mask_elt_type));
1554 cond = fold_build3 (COND_EXPR, vect_elt_type,
1555 cond, v0_val, v1_val);
1556 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1557 true, GSI_SAME_STMT);
1559 else
1560 t = v0_val;
1563 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1566 constr = build_constructor (vect_type, v);
1567 gimple_assign_set_rhs_from_tree (gsi, constr);
1568 update_stmt (gsi_stmt (*gsi));
1571 /* If OP is a uniform vector return the element it is a splat from. */
1573 static tree
1574 ssa_uniform_vector_p (tree op)
1576 if (TREE_CODE (op) == VECTOR_CST
1577 || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1578 || TREE_CODE (op) == CONSTRUCTOR)
1579 return uniform_vector_p (op);
1580 if (TREE_CODE (op) == SSA_NAME)
1582 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1583 if (gimple_assign_single_p (def_stmt))
1584 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1586 return NULL_TREE;
1589 /* Return type in which CODE operation with optab OP can be
1590 computed. */
1592 static tree
1593 get_compute_type (enum tree_code code, optab op, tree type)
1595 /* For very wide vectors, try using a smaller vector mode. */
1596 tree compute_type = type;
1597 if (op
1598 && (!VECTOR_MODE_P (TYPE_MODE (type))
1599 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1601 tree vector_compute_type
1602 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1603 if (vector_compute_type != NULL_TREE
1604 && subparts_gt (compute_type, vector_compute_type)
1605 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1606 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1607 != CODE_FOR_nothing))
1608 compute_type = vector_compute_type;
1611 /* If we are breaking a BLKmode vector into smaller pieces,
1612 type_for_widest_vector_mode has already looked into the optab,
1613 so skip these checks. */
1614 if (compute_type == type)
1616 machine_mode compute_mode = TYPE_MODE (compute_type);
1617 if (VECTOR_MODE_P (compute_mode))
1619 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1620 return compute_type;
1621 if (code == MULT_HIGHPART_EXPR
1622 && can_mult_highpart_p (compute_mode,
1623 TYPE_UNSIGNED (compute_type)))
1624 return compute_type;
1626 /* There is no operation in hardware, so fall back to scalars. */
1627 compute_type = TREE_TYPE (type);
1630 return compute_type;
1633 static tree
1634 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1635 tree bitpos, tree bitsize, enum tree_code code,
1636 tree type ATTRIBUTE_UNUSED)
1638 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1639 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1640 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1641 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1642 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1643 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1646 /* Expand a vector COND_EXPR to scalars, piecewise. */
1647 static void
1648 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1650 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1651 tree type = gimple_expr_type (stmt);
1652 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1653 machine_mode compute_mode = TYPE_MODE (compute_type);
1654 gcc_assert (compute_mode != BLKmode);
1655 tree lhs = gimple_assign_lhs (stmt);
1656 tree rhs2 = gimple_assign_rhs2 (stmt);
1657 tree rhs3 = gimple_assign_rhs3 (stmt);
1658 tree new_rhs;
1660 /* If the compute mode is not a vector mode (hence we are not decomposing
1661 a BLKmode vector to smaller, hardware-supported vectors), we may want
1662 to expand the operations in parallel. */
1663 if (!VECTOR_MODE_P (compute_mode))
1664 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1665 COND_EXPR);
1666 else
1667 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1668 rhs2, rhs3, COND_EXPR);
1669 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1670 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1671 new_rhs);
1673 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1674 way to do it is change expand_vector_operation and its callees to
1675 return a tree_code, RHS1 and RHS2 instead of a tree. */
1676 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1677 update_stmt (gsi_stmt (*gsi));
1680 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1681 lowering. If INNER_TYPE is not a vector type, this is a scalar
1682 fallback. */
1684 static tree
1685 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1686 tree decl, tree bitpos, tree bitsize,
1687 enum tree_code code, tree type)
1689 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1690 if (!VECTOR_TYPE_P (inner_type))
1691 return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1692 if (code == CALL_EXPR)
1694 gimple *g = gimple_build_call (decl, 1, a);
1695 tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1696 gimple_call_set_lhs (g, lhs);
1697 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1698 return lhs;
1700 else
1702 tree outer_type = build_vector_type (TREE_TYPE (type),
1703 TYPE_VECTOR_SUBPARTS (inner_type));
1704 return gimplify_build1 (gsi, code, outer_type, a);
1708 /* Similarly, but for narrowing conversion. */
1710 static tree
1711 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1712 tree, tree bitpos, tree, enum tree_code code,
1713 tree type)
1715 tree itype = build_vector_type (TREE_TYPE (inner_type),
1716 exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1717 2));
1718 tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1719 tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1720 int_const_binop (PLUS_EXPR, bitpos,
1721 TYPE_SIZE (itype)));
1722 tree outer_type = build_vector_type (TREE_TYPE (type),
1723 TYPE_VECTOR_SUBPARTS (inner_type));
1724 return gimplify_build2 (gsi, code, outer_type, b, c);
1727 /* Expand VEC_CONVERT ifn call. */
1729 static void
1730 expand_vector_conversion (gimple_stmt_iterator *gsi)
1732 gimple *stmt = gsi_stmt (*gsi);
1733 gimple *g;
1734 tree lhs = gimple_call_lhs (stmt);
1735 tree arg = gimple_call_arg (stmt, 0);
1736 tree ret_type = TREE_TYPE (lhs);
1737 tree arg_type = TREE_TYPE (arg);
1738 tree new_rhs, compute_type = TREE_TYPE (arg_type);
1739 enum tree_code code = NOP_EXPR;
1740 enum tree_code code1 = ERROR_MARK;
1741 enum { NARROW, NONE, WIDEN } modifier = NONE;
1742 optab optab1 = unknown_optab;
1744 gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1745 if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1746 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1747 code = FIX_TRUNC_EXPR;
1748 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1749 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1750 code = FLOAT_EXPR;
1751 unsigned int ret_elt_bits = vector_element_bits (ret_type);
1752 unsigned int arg_elt_bits = vector_element_bits (arg_type);
1753 if (ret_elt_bits < arg_elt_bits)
1754 modifier = NARROW;
1755 else if (ret_elt_bits > arg_elt_bits)
1756 modifier = WIDEN;
1758 if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1760 if (supportable_convert_operation (code, ret_type, arg_type, &code1))
1762 g = gimple_build_assign (lhs, code1, arg);
1763 gsi_replace (gsi, g, false);
1764 return;
1766 /* Can't use get_compute_type here, as supportable_convert_operation
1767 doesn't necessarily use an optab and needs two arguments. */
1768 tree vec_compute_type
1769 = type_for_widest_vector_mode (TREE_TYPE (arg_type), mov_optab);
1770 if (vec_compute_type
1771 && VECTOR_MODE_P (TYPE_MODE (vec_compute_type))
1772 && subparts_gt (arg_type, vec_compute_type))
1774 unsigned HOST_WIDE_INT nelts
1775 = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1776 while (nelts > 1)
1778 tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1779 tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1780 if (supportable_convert_operation (code, ret1_type, arg1_type,
1781 &code1))
1783 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1784 ret_type, arg1_type, arg,
1785 NULL_TREE, code1);
1786 g = gimple_build_assign (lhs, new_rhs);
1787 gsi_replace (gsi, g, false);
1788 return;
1790 nelts = nelts / 2;
1794 else if (modifier == NARROW)
1796 switch (code)
1798 CASE_CONVERT:
1799 code1 = VEC_PACK_TRUNC_EXPR;
1800 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1801 break;
1802 case FIX_TRUNC_EXPR:
1803 code1 = VEC_PACK_FIX_TRUNC_EXPR;
1804 /* The signedness is determined from output operand. */
1805 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1806 break;
1807 case FLOAT_EXPR:
1808 code1 = VEC_PACK_FLOAT_EXPR;
1809 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1810 break;
1811 default:
1812 gcc_unreachable ();
1815 if (optab1)
1816 compute_type = get_compute_type (code1, optab1, arg_type);
1817 enum insn_code icode1;
1818 if (VECTOR_TYPE_P (compute_type)
1819 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1820 != CODE_FOR_nothing)
1821 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1823 tree cretd_type
1824 = build_vector_type (TREE_TYPE (ret_type),
1825 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1826 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1828 if (compute_type == arg_type)
1830 new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1831 arg, build_zero_cst (arg_type));
1832 new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1833 TYPE_SIZE (ret_type),
1834 bitsize_int (0));
1835 g = gimple_build_assign (lhs, new_rhs);
1836 gsi_replace (gsi, g, false);
1837 return;
1839 tree dcompute_type
1840 = build_vector_type (TREE_TYPE (compute_type),
1841 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1842 if (TYPE_MAIN_VARIANT (dcompute_type)
1843 == TYPE_MAIN_VARIANT (arg_type))
1844 new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1845 NULL_TREE, bitsize_int (0),
1846 NULL_TREE, code1,
1847 ret_type);
1848 else
1849 new_rhs = expand_vector_piecewise (gsi,
1850 do_vec_narrow_conversion,
1851 arg_type, dcompute_type,
1852 arg, NULL_TREE, code1,
1853 ret_type);
1854 g = gimple_build_assign (lhs, new_rhs);
1855 gsi_replace (gsi, g, false);
1856 return;
1860 else if (modifier == WIDEN)
1862 enum tree_code code2 = ERROR_MARK;
1863 optab optab2 = unknown_optab;
1864 switch (code)
1866 CASE_CONVERT:
1867 code1 = VEC_UNPACK_LO_EXPR;
1868 code2 = VEC_UNPACK_HI_EXPR;
1869 break;
1870 case FIX_TRUNC_EXPR:
1871 code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
1872 code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
1873 break;
1874 case FLOAT_EXPR:
1875 code1 = VEC_UNPACK_FLOAT_LO_EXPR;
1876 code2 = VEC_UNPACK_FLOAT_HI_EXPR;
1877 break;
1878 default:
1879 gcc_unreachable ();
1881 if (BYTES_BIG_ENDIAN)
1882 std::swap (code1, code2);
1884 if (code == FIX_TRUNC_EXPR)
1886 /* The signedness is determined from output operand. */
1887 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1888 optab2 = optab_for_tree_code (code2, ret_type, optab_default);
1890 else
1892 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1893 optab2 = optab_for_tree_code (code2, arg_type, optab_default);
1896 if (optab1 && optab2)
1897 compute_type = get_compute_type (code1, optab1, arg_type);
1899 enum insn_code icode1, icode2;
1900 if (VECTOR_TYPE_P (compute_type)
1901 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1902 != CODE_FOR_nothing)
1903 && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
1904 != CODE_FOR_nothing)
1905 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
1906 && (insn_data[icode1].operand[0].mode
1907 == insn_data[icode2].operand[0].mode))
1909 poly_uint64 nunits
1910 = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
1911 tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
1912 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1914 vec<constructor_elt, va_gc> *v;
1915 tree part_width = TYPE_SIZE (compute_type);
1916 tree index = bitsize_int (0);
1917 int nunits = nunits_for_known_piecewise_op (arg_type);
1918 int delta = tree_to_uhwi (part_width) / arg_elt_bits;
1919 int i;
1920 location_t loc = gimple_location (gsi_stmt (*gsi));
1922 if (compute_type != arg_type)
1923 warning_at (loc, OPT_Wvector_operation_performance,
1924 "vector operation will be expanded piecewise");
1925 else
1927 nunits = 1;
1928 delta = 1;
1931 vec_alloc (v, (nunits + delta - 1) / delta * 2);
1932 for (i = 0; i < nunits;
1933 i += delta, index = int_const_binop (PLUS_EXPR, index,
1934 part_width))
1936 tree a = arg;
1937 if (compute_type != arg_type)
1938 a = tree_vec_extract (gsi, compute_type, a, part_width,
1939 index);
1940 tree result = gimplify_build1 (gsi, code1, cretd_type, a);
1941 constructor_elt ce = { NULL_TREE, result };
1942 v->quick_push (ce);
1943 ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
1944 v->quick_push (ce);
1947 new_rhs = build_constructor (ret_type, v);
1948 g = gimple_build_assign (lhs, new_rhs);
1949 gsi_replace (gsi, g, false);
1950 return;
1955 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
1956 TREE_TYPE (arg_type), arg,
1957 NULL_TREE, code, ret_type);
1958 g = gimple_build_assign (lhs, new_rhs);
1959 gsi_replace (gsi, g, false);
1962 /* Process one statement. If we identify a vector operation, expand it. */
1964 static void
1965 expand_vector_operations_1 (gimple_stmt_iterator *gsi,
1966 auto_bitmap *dce_ssa_names)
1968 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1969 enum tree_code code;
1970 optab op = unknown_optab;
1971 enum gimple_rhs_class rhs_class;
1972 tree new_rhs;
1974 /* Only consider code == GIMPLE_ASSIGN. */
1975 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1976 if (!stmt)
1978 if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
1979 expand_vector_conversion (gsi);
1980 return;
1983 code = gimple_assign_rhs_code (stmt);
1984 rhs_class = get_gimple_rhs_class (code);
1985 lhs = gimple_assign_lhs (stmt);
1987 if (code == VEC_PERM_EXPR)
1989 lower_vec_perm (gsi);
1990 return;
1993 if (code == VEC_COND_EXPR)
1995 expand_vector_condition (gsi, dce_ssa_names);
1996 return;
1999 if (code == COND_EXPR
2000 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
2001 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
2003 expand_vector_scalar_condition (gsi);
2004 return;
2007 if (code == CONSTRUCTOR
2008 && TREE_CODE (lhs) == SSA_NAME
2009 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2010 && !gimple_clobber_p (stmt)
2011 && optimize)
2013 optimize_vector_constructor (gsi);
2014 return;
2017 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2018 return;
2020 rhs1 = gimple_assign_rhs1 (stmt);
2021 type = gimple_expr_type (stmt);
2022 if (rhs_class == GIMPLE_BINARY_RHS)
2023 rhs2 = gimple_assign_rhs2 (stmt);
2025 if (!VECTOR_TYPE_P (type)
2026 || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2027 return;
2029 /* A scalar operation pretending to be a vector one. */
2030 if (VECTOR_BOOLEAN_TYPE_P (type)
2031 && !VECTOR_MODE_P (TYPE_MODE (type))
2032 && TYPE_MODE (type) != BLKmode
2033 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2034 || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2035 && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2036 && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2037 return;
2039 /* If the vector operation is operating on all same vector elements
2040 implement it with a scalar operation and a splat if the target
2041 supports the scalar operation. */
2042 tree srhs1, srhs2 = NULL_TREE;
2043 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2044 && (rhs2 == NULL_TREE
2045 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2046 && (srhs2 = rhs2))
2047 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2048 /* As we query direct optabs restrict to non-convert operations. */
2049 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2051 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2052 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2053 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2055 tree slhs = make_ssa_name (TREE_TYPE (srhs1));
2056 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
2057 gsi_insert_before (gsi, repl, GSI_SAME_STMT);
2058 gimple_assign_set_rhs_from_tree (gsi,
2059 build_vector_from_val (type, slhs));
2060 update_stmt (stmt);
2061 return;
2065 if (CONVERT_EXPR_CODE_P (code)
2066 || code == FLOAT_EXPR
2067 || code == FIX_TRUNC_EXPR
2068 || code == VIEW_CONVERT_EXPR)
2069 return;
2071 /* The signedness is determined from input argument. */
2072 if (code == VEC_UNPACK_FLOAT_HI_EXPR
2073 || code == VEC_UNPACK_FLOAT_LO_EXPR
2074 || code == VEC_PACK_FLOAT_EXPR)
2076 /* We do not know how to scalarize those. */
2077 return;
2080 /* For widening/narrowing vector operations, the relevant type is of the
2081 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
2082 calculated in the same way above. */
2083 if (code == WIDEN_SUM_EXPR
2084 || code == VEC_WIDEN_MULT_HI_EXPR
2085 || code == VEC_WIDEN_MULT_LO_EXPR
2086 || code == VEC_WIDEN_MULT_EVEN_EXPR
2087 || code == VEC_WIDEN_MULT_ODD_EXPR
2088 || code == VEC_UNPACK_HI_EXPR
2089 || code == VEC_UNPACK_LO_EXPR
2090 || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2091 || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2092 || code == VEC_PACK_TRUNC_EXPR
2093 || code == VEC_PACK_SAT_EXPR
2094 || code == VEC_PACK_FIX_TRUNC_EXPR
2095 || code == VEC_WIDEN_LSHIFT_HI_EXPR
2096 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2098 /* We do not know how to scalarize those. */
2099 return;
2102 /* Choose between vector shift/rotate by vector and vector shift/rotate by
2103 scalar */
2104 if (code == LSHIFT_EXPR
2105 || code == RSHIFT_EXPR
2106 || code == LROTATE_EXPR
2107 || code == RROTATE_EXPR)
2109 optab opv;
2111 /* Check whether we have vector <op> {x,x,x,x} where x
2112 could be a scalar variable or a constant. Transform
2113 vector <op> {x,x,x,x} ==> vector <op> scalar. */
2114 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2116 tree first;
2118 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2120 gimple_assign_set_rhs2 (stmt, first);
2121 update_stmt (stmt);
2122 rhs2 = first;
2126 opv = optab_for_tree_code (code, type, optab_vector);
2127 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2128 op = opv;
2129 else
2131 op = optab_for_tree_code (code, type, optab_scalar);
2133 compute_type = get_compute_type (code, op, type);
2134 if (compute_type == type)
2135 return;
2136 /* The rtl expander will expand vector/scalar as vector/vector
2137 if necessary. Pick one with wider vector type. */
2138 tree compute_vtype = get_compute_type (code, opv, type);
2139 if (subparts_gt (compute_vtype, compute_type))
2141 compute_type = compute_vtype;
2142 op = opv;
2146 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2148 if (compute_type == NULL_TREE)
2149 compute_type = get_compute_type (code, op, type);
2150 if (compute_type == type)
2151 return;
2152 /* Before splitting vector rotates into scalar rotates,
2153 see if we can't use vector shifts and BIT_IOR_EXPR
2154 instead. For vector by vector rotates we'd also
2155 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2156 for now, fold doesn't seem to create such rotates anyway. */
2157 if (compute_type == TREE_TYPE (type)
2158 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2160 optab oplv = vashl_optab, opl = ashl_optab;
2161 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2162 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2163 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2164 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2165 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2166 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2167 /* The rtl expander will expand vector/scalar as vector/vector
2168 if necessary. Pick one with wider vector type. */
2169 if (subparts_gt (compute_lvtype, compute_ltype))
2171 compute_ltype = compute_lvtype;
2172 opl = oplv;
2174 if (subparts_gt (compute_rvtype, compute_rtype))
2176 compute_rtype = compute_rvtype;
2177 opr = oprv;
2179 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2180 BIT_IOR_EXPR. */
2181 compute_type = compute_ltype;
2182 if (subparts_gt (compute_type, compute_rtype))
2183 compute_type = compute_rtype;
2184 if (subparts_gt (compute_type, compute_otype))
2185 compute_type = compute_otype;
2186 /* Verify all 3 operations can be performed in that type. */
2187 if (compute_type != TREE_TYPE (type))
2189 if (optab_handler (opl, TYPE_MODE (compute_type))
2190 == CODE_FOR_nothing
2191 || optab_handler (opr, TYPE_MODE (compute_type))
2192 == CODE_FOR_nothing
2193 || optab_handler (opo, TYPE_MODE (compute_type))
2194 == CODE_FOR_nothing)
2195 compute_type = TREE_TYPE (type);
2200 else
2201 op = optab_for_tree_code (code, type, optab_default);
2203 /* Optabs will try converting a negation into a subtraction, so
2204 look for it as well. TODO: negation of floating-point vectors
2205 might be turned into an exclusive OR toggling the sign bit. */
2206 if (op == unknown_optab
2207 && code == NEGATE_EXPR
2208 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2209 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2211 if (compute_type == NULL_TREE)
2212 compute_type = get_compute_type (code, op, type);
2213 if (compute_type == type)
2214 return;
2216 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
2218 /* Leave expression untouched for later expansion. */
2219 if (new_rhs == NULL_TREE)
2220 return;
2222 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2223 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2224 new_rhs);
2226 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
2227 way to do it is change expand_vector_operation and its callees to
2228 return a tree_code, RHS1 and RHS2 instead of a tree. */
2229 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2230 update_stmt (gsi_stmt (*gsi));
2233 /* Use this to lower vector operations introduced by the vectorizer,
2234 if it may need the bit-twiddling tricks implemented in this file. */
2236 static unsigned int
2237 expand_vector_operations (void)
2239 gimple_stmt_iterator gsi;
2240 basic_block bb;
2241 bool cfg_changed = false;
2243 auto_bitmap dce_ssa_names;
2245 FOR_EACH_BB_FN (bb, cfun)
2247 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2249 expand_vector_operations_1 (&gsi, &dce_ssa_names);
2250 /* ??? If we do not cleanup EH then we will ICE in
2251 verification. But in reality we have created wrong-code
2252 as we did not properly transition EH info and edges to
2253 the piecewise computations. */
2254 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2255 && gimple_purge_dead_eh_edges (bb))
2256 cfg_changed = true;
2260 simple_dce_from_worklist (dce_ssa_names);
2262 return cfg_changed ? TODO_cleanup_cfg : 0;
2265 namespace {
2267 const pass_data pass_data_lower_vector =
2269 GIMPLE_PASS, /* type */
2270 "veclower", /* name */
2271 OPTGROUP_VEC, /* optinfo_flags */
2272 TV_NONE, /* tv_id */
2273 PROP_cfg, /* properties_required */
2274 PROP_gimple_lvec, /* properties_provided */
2275 0, /* properties_destroyed */
2276 0, /* todo_flags_start */
2277 TODO_update_ssa, /* todo_flags_finish */
2280 class pass_lower_vector : public gimple_opt_pass
2282 public:
2283 pass_lower_vector (gcc::context *ctxt)
2284 : gimple_opt_pass (pass_data_lower_vector, ctxt)
2287 /* opt_pass methods: */
2288 virtual bool gate (function *fun)
2290 return !(fun->curr_properties & PROP_gimple_lvec);
2293 virtual unsigned int execute (function *)
2295 return expand_vector_operations ();
2298 }; // class pass_lower_vector
2300 } // anon namespace
2302 gimple_opt_pass *
2303 make_pass_lower_vector (gcc::context *ctxt)
2305 return new pass_lower_vector (ctxt);
2308 namespace {
2310 const pass_data pass_data_lower_vector_ssa =
2312 GIMPLE_PASS, /* type */
2313 "veclower2", /* name */
2314 OPTGROUP_VEC, /* optinfo_flags */
2315 TV_NONE, /* tv_id */
2316 PROP_cfg, /* properties_required */
2317 PROP_gimple_lvec, /* properties_provided */
2318 0, /* properties_destroyed */
2319 0, /* todo_flags_start */
2320 ( TODO_update_ssa
2321 | TODO_cleanup_cfg ), /* todo_flags_finish */
2324 class pass_lower_vector_ssa : public gimple_opt_pass
2326 public:
2327 pass_lower_vector_ssa (gcc::context *ctxt)
2328 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2331 /* opt_pass methods: */
2332 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
2333 virtual unsigned int execute (function *)
2335 return expand_vector_operations ();
2338 }; // class pass_lower_vector_ssa
2340 } // anon namespace
2342 gimple_opt_pass *
2343 make_pass_lower_vector_ssa (gcc::context *ctxt)
2345 return new pass_lower_vector_ssa (ctxt);
2348 #include "gt-tree-vect-generic.h"