Merge branches/gcc-4_9-branch rev 225109.
[official-gcc.git] / gcc-4_9-branch / gcc / tree-vect-generic.c
blobf756156956fe8ddd68af10f6c7ef92b3eb8ca503
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tree.h"
24 #include "stor-layout.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "basic-block.h"
28 #include "tree-ssa-alias.h"
29 #include "internal-fn.h"
30 #include "tree-eh.h"
31 #include "gimple-expr.h"
32 #include "is-a.h"
33 #include "gimple.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "gimple-ssa.h"
37 #include "tree-cfg.h"
38 #include "stringpool.h"
39 #include "tree-ssanames.h"
40 #include "tree-iterator.h"
41 #include "tree-pass.h"
42 #include "flags.h"
43 #include "diagnostic.h"
44 #include "target.h"
46 /* Need to include rtl.h, expr.h, etc. for optabs. */
47 #include "expr.h"
48 #include "optabs.h"
51 static void expand_vector_operations_1 (gimple_stmt_iterator *);
54 /* Build a constant of type TYPE, made of VALUE's bits replicated
55 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
56 static tree
57 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
59 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
60 int n = HOST_BITS_PER_WIDE_INT / width;
61 unsigned HOST_WIDE_INT low, high, mask;
62 tree ret;
64 gcc_assert (n);
66 if (width == HOST_BITS_PER_WIDE_INT)
67 low = value;
68 else
70 mask = ((HOST_WIDE_INT)1 << width) - 1;
71 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
74 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
75 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
76 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
77 high = 0;
78 else if (TYPE_PRECISION (type) == HOST_BITS_PER_DOUBLE_INT)
79 high = low;
80 else
81 gcc_unreachable ();
83 ret = build_int_cst_wide (type, low, high);
84 return ret;
87 static GTY(()) tree vector_inner_type;
88 static GTY(()) tree vector_last_type;
89 static GTY(()) int vector_last_nunits;
91 /* Return a suitable vector types made of SUBPARTS units each of mode
92 "word_mode" (the global variable). */
93 static tree
94 build_word_mode_vector_type (int nunits)
96 if (!vector_inner_type)
97 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
98 else if (vector_last_nunits == nunits)
100 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
101 return vector_last_type;
104 /* We build a new type, but we canonicalize it nevertheless,
105 because it still saves some memory. */
106 vector_last_nunits = nunits;
107 vector_last_type = type_hash_canon (nunits,
108 build_vector_type (vector_inner_type,
109 nunits));
110 return vector_last_type;
113 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
114 tree, tree, tree, tree, tree, enum tree_code);
116 static inline tree
117 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
118 tree t, tree bitsize, tree bitpos)
120 if (bitpos)
121 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
122 else
123 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
126 static tree
127 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
128 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
129 enum tree_code code)
131 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
132 return gimplify_build1 (gsi, code, inner_type, a);
135 static tree
136 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
137 tree bitpos, tree bitsize, enum tree_code code)
139 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
140 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
141 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
142 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
143 return gimplify_build2 (gsi, code, inner_type, a, b);
146 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
148 INNER_TYPE is the type of A and B elements
150 returned expression is of signed integer type with the
151 size equal to the size of INNER_TYPE. */
152 static tree
153 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
154 tree bitpos, tree bitsize, enum tree_code code)
156 tree comp_type;
158 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
159 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
161 comp_type = build_nonstandard_integer_type
162 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
164 return gimplify_build3 (gsi, COND_EXPR, comp_type,
165 fold_build2 (code, boolean_type_node, a, b),
166 build_int_cst (comp_type, -1),
167 build_int_cst (comp_type, 0));
170 /* Expand vector addition to scalars. This does bit twiddling
171 in order to increase parallelism:
173 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
174 (a ^ b) & 0x80808080
176 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
177 (a ^ ~b) & 0x80808080
179 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
181 This optimization should be done only if 4 vector items or more
182 fit into a word. */
183 static tree
184 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
185 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
186 enum tree_code code)
188 tree inner_type = TREE_TYPE (TREE_TYPE (a));
189 unsigned HOST_WIDE_INT max;
190 tree low_bits, high_bits, a_low, b_low, result_low, signs;
192 max = GET_MODE_MASK (TYPE_MODE (inner_type));
193 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
194 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
196 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
197 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
199 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
200 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
201 if (code == PLUS_EXPR)
202 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
203 else
205 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
206 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
209 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
210 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
211 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
214 static tree
215 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
216 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
217 tree bitsize ATTRIBUTE_UNUSED,
218 enum tree_code code ATTRIBUTE_UNUSED)
220 tree inner_type = TREE_TYPE (TREE_TYPE (b));
221 HOST_WIDE_INT max;
222 tree low_bits, high_bits, b_low, result_low, signs;
224 max = GET_MODE_MASK (TYPE_MODE (inner_type));
225 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
226 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
228 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
230 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
231 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
232 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
233 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
234 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
237 /* Expand a vector operation to scalars, by using many operations
238 whose type is the vector type's inner type. */
239 static tree
240 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
241 tree type, tree inner_type,
242 tree a, tree b, enum tree_code code)
244 vec<constructor_elt, va_gc> *v;
245 tree part_width = TYPE_SIZE (inner_type);
246 tree index = bitsize_int (0);
247 int nunits = TYPE_VECTOR_SUBPARTS (type);
248 int delta = tree_to_uhwi (part_width)
249 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
250 int i;
251 location_t loc = gimple_location (gsi_stmt (*gsi));
253 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
254 warning_at (loc, OPT_Wvector_operation_performance,
255 "vector operation will be expanded piecewise");
256 else
257 warning_at (loc, OPT_Wvector_operation_performance,
258 "vector operation will be expanded in parallel");
260 vec_alloc (v, (nunits + delta - 1) / delta);
261 for (i = 0; i < nunits;
262 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
264 tree result = f (gsi, inner_type, a, b, index, part_width, code);
265 constructor_elt ce = {NULL_TREE, result};
266 v->quick_push (ce);
269 return build_constructor (type, v);
272 /* Expand a vector operation to scalars with the freedom to use
273 a scalar integer type, or to use a different size for the items
274 in the vector type. */
275 static tree
276 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
277 tree a, tree b,
278 enum tree_code code)
280 tree result, compute_type;
281 enum machine_mode mode;
282 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
283 location_t loc = gimple_location (gsi_stmt (*gsi));
285 /* We have three strategies. If the type is already correct, just do
286 the operation an element at a time. Else, if the vector is wider than
287 one word, do it a word at a time; finally, if the vector is smaller
288 than one word, do it as a scalar. */
289 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
290 return expand_vector_piecewise (gsi, f,
291 type, TREE_TYPE (type),
292 a, b, code);
293 else if (n_words > 1)
295 tree word_type = build_word_mode_vector_type (n_words);
296 result = expand_vector_piecewise (gsi, f,
297 word_type, TREE_TYPE (word_type),
298 a, b, code);
299 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
300 GSI_SAME_STMT);
302 else
304 /* Use a single scalar operation with a mode no wider than word_mode. */
305 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
306 compute_type = lang_hooks.types.type_for_mode (mode, 1);
307 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
308 warning_at (loc, OPT_Wvector_operation_performance,
309 "vector operation will be expanded with a "
310 "single scalar operation");
313 return result;
316 /* Expand a vector operation to scalars; for integer types we can use
317 special bit twiddling tricks to do the sums a word at a time, using
318 function F_PARALLEL instead of F. These tricks are done only if
319 they can process at least four items, that is, only if the vector
320 holds at least four items and if a word can hold four items. */
321 static tree
322 expand_vector_addition (gimple_stmt_iterator *gsi,
323 elem_op_func f, elem_op_func f_parallel,
324 tree type, tree a, tree b, enum tree_code code)
326 int parts_per_word = UNITS_PER_WORD
327 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
329 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
330 && parts_per_word >= 4
331 && TYPE_VECTOR_SUBPARTS (type) >= 4)
332 return expand_vector_parallel (gsi, f_parallel,
333 type, a, b, code);
334 else
335 return expand_vector_piecewise (gsi, f,
336 type, TREE_TYPE (type),
337 a, b, code);
340 /* Try to expand vector comparison expression OP0 CODE OP1 by
341 querying optab if the following expression:
342 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
343 can be expanded. */
344 static tree
345 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
346 tree op1, enum tree_code code)
348 tree t;
349 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
350 t = expand_vector_piecewise (gsi, do_compare, type,
351 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
352 else
353 t = NULL_TREE;
355 return t;
358 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
359 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
360 the result if successful, otherwise return NULL_TREE. */
361 static tree
362 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
364 optab op;
365 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
366 bool scalar_shift = true;
368 for (i = 1; i < nunits; i++)
370 if (shiftcnts[i] != shiftcnts[0])
371 scalar_shift = false;
374 if (scalar_shift && shiftcnts[0] == 0)
375 return op0;
377 if (scalar_shift)
379 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
380 if (op != unknown_optab
381 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
382 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
383 build_int_cst (NULL_TREE, shiftcnts[0]));
386 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
387 if (op != unknown_optab
388 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
390 tree *vec = XALLOCAVEC (tree, nunits);
391 for (i = 0; i < nunits; i++)
392 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
393 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
394 build_vector (type, vec));
397 return NULL_TREE;
400 /* Try to expand integer vector division by constant using
401 widening multiply, shifts and additions. */
402 static tree
403 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
404 tree op1, enum tree_code code)
406 bool use_pow2 = true;
407 bool has_vector_shift = true;
408 int mode = -1, this_mode;
409 int pre_shift = -1, post_shift;
410 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
411 int *shifts = XALLOCAVEC (int, nunits * 4);
412 int *pre_shifts = shifts + nunits;
413 int *post_shifts = pre_shifts + nunits;
414 int *shift_temps = post_shifts + nunits;
415 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
416 int prec = TYPE_PRECISION (TREE_TYPE (type));
417 int dummy_int;
418 unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
419 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
420 tree *vec;
421 tree cur_op, mulcst, tem;
422 optab op;
424 if (prec > HOST_BITS_PER_WIDE_INT)
425 return NULL_TREE;
427 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
428 if (op == unknown_optab
429 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
430 has_vector_shift = false;
432 /* Analysis phase. Determine if all op1 elements are either power
433 of two and it is possible to expand it using shifts (or for remainder
434 using masking). Additionally compute the multiplicative constants
435 and pre and post shifts if the division is to be expanded using
436 widening or high part multiplication plus shifts. */
437 for (i = 0; i < nunits; i++)
439 tree cst = VECTOR_CST_ELT (op1, i);
440 unsigned HOST_WIDE_INT ml;
442 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
443 return NULL_TREE;
444 pre_shifts[i] = 0;
445 post_shifts[i] = 0;
446 mulc[i] = 0;
447 if (use_pow2
448 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
449 use_pow2 = false;
450 if (use_pow2)
452 shifts[i] = tree_log2 (cst);
453 if (shifts[i] != shifts[0]
454 && code == TRUNC_DIV_EXPR
455 && !has_vector_shift)
456 use_pow2 = false;
458 if (mode == -2)
459 continue;
460 if (unsignedp)
462 unsigned HOST_WIDE_INT mh;
463 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
465 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
466 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
467 return NULL_TREE;
469 if (d <= 1)
471 mode = -2;
472 continue;
475 /* Find a suitable multiplier and right shift count
476 instead of multiplying with D. */
477 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
479 /* If the suggested multiplier is more than SIZE bits, we can
480 do better for even divisors, using an initial right shift. */
481 if ((mh != 0 && (d & 1) == 0)
482 || (!has_vector_shift && pre_shift != -1))
484 if (has_vector_shift)
485 pre_shift = floor_log2 (d & -d);
486 else if (pre_shift == -1)
488 unsigned int j;
489 for (j = 0; j < nunits; j++)
491 tree cst2 = VECTOR_CST_ELT (op1, j);
492 unsigned HOST_WIDE_INT d2;
493 int this_pre_shift;
495 if (!tree_fits_uhwi_p (cst2))
496 return NULL_TREE;
497 d2 = tree_to_uhwi (cst2) & mask;
498 if (d2 == 0)
499 return NULL_TREE;
500 this_pre_shift = floor_log2 (d2 & -d2);
501 if (pre_shift == -1 || this_pre_shift < pre_shift)
502 pre_shift = this_pre_shift;
504 if (i != 0 && pre_shift != 0)
506 /* Restart. */
507 i = -1U;
508 mode = -1;
509 continue;
512 if (pre_shift != 0)
514 if ((d >> pre_shift) <= 1)
516 mode = -2;
517 continue;
519 mh = choose_multiplier (d >> pre_shift, prec,
520 prec - pre_shift,
521 &ml, &post_shift, &dummy_int);
522 gcc_assert (!mh);
523 pre_shifts[i] = pre_shift;
526 if (!mh)
527 this_mode = 0;
528 else
529 this_mode = 1;
531 else
533 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
534 unsigned HOST_WIDE_INT abs_d;
536 if (d == -1)
537 return NULL_TREE;
539 /* Since d might be INT_MIN, we have to cast to
540 unsigned HOST_WIDE_INT before negating to avoid
541 undefined signed overflow. */
542 abs_d = (d >= 0
543 ? (unsigned HOST_WIDE_INT) d
544 : - (unsigned HOST_WIDE_INT) d);
546 /* n rem d = n rem -d */
547 if (code == TRUNC_MOD_EXPR && d < 0)
548 d = abs_d;
549 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
551 /* This case is not handled correctly below. */
552 mode = -2;
553 continue;
555 if (abs_d <= 1)
557 mode = -2;
558 continue;
561 choose_multiplier (abs_d, prec, prec - 1, &ml,
562 &post_shift, &dummy_int);
563 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
565 this_mode = 4 + (d < 0);
566 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
568 else
569 this_mode = 2 + (d < 0);
571 mulc[i] = ml;
572 post_shifts[i] = post_shift;
573 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
574 || post_shift >= prec
575 || pre_shifts[i] >= prec)
576 this_mode = -2;
578 if (i == 0)
579 mode = this_mode;
580 else if (mode != this_mode)
581 mode = -2;
584 vec = XALLOCAVEC (tree, nunits);
586 if (use_pow2)
588 tree addend = NULL_TREE;
589 if (!unsignedp)
591 tree uns_type;
593 /* Both division and remainder sequences need
594 op0 < 0 ? mask : 0 computed. It can be either computed as
595 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
596 if none of the shifts is 0, or as the conditional. */
597 for (i = 0; i < nunits; i++)
598 if (shifts[i] == 0)
599 break;
600 uns_type
601 = build_vector_type (build_nonstandard_integer_type (prec, 1),
602 nunits);
603 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
605 for (i = 0; i < nunits; i++)
606 shift_temps[i] = prec - 1;
607 cur_op = add_rshift (gsi, type, op0, shift_temps);
608 if (cur_op != NULL_TREE)
610 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
611 uns_type, cur_op);
612 for (i = 0; i < nunits; i++)
613 shift_temps[i] = prec - shifts[i];
614 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
615 if (cur_op != NULL_TREE)
616 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
617 type, cur_op);
620 if (addend == NULL_TREE
621 && expand_vec_cond_expr_p (type, type))
623 tree zero, cst, cond;
624 gimple stmt;
626 zero = build_zero_cst (type);
627 cond = build2 (LT_EXPR, type, op0, zero);
628 for (i = 0; i < nunits; i++)
629 vec[i] = build_int_cst (TREE_TYPE (type),
630 ((unsigned HOST_WIDE_INT) 1
631 << shifts[i]) - 1);
632 cst = build_vector (type, vec);
633 addend = make_ssa_name (type, NULL);
634 stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend,
635 cond, cst, zero);
636 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
639 if (code == TRUNC_DIV_EXPR)
641 if (unsignedp)
643 /* q = op0 >> shift; */
644 cur_op = add_rshift (gsi, type, op0, shifts);
645 if (cur_op != NULL_TREE)
646 return cur_op;
648 else if (addend != NULL_TREE)
650 /* t1 = op0 + addend;
651 q = t1 >> shift; */
652 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
653 if (op != unknown_optab
654 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
656 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
657 cur_op = add_rshift (gsi, type, cur_op, shifts);
658 if (cur_op != NULL_TREE)
659 return cur_op;
663 else
665 tree mask;
666 for (i = 0; i < nunits; i++)
667 vec[i] = build_int_cst (TREE_TYPE (type),
668 ((unsigned HOST_WIDE_INT) 1
669 << shifts[i]) - 1);
670 mask = build_vector (type, vec);
671 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
672 if (op != unknown_optab
673 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
675 if (unsignedp)
676 /* r = op0 & mask; */
677 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
678 else if (addend != NULL_TREE)
680 /* t1 = op0 + addend;
681 t2 = t1 & mask;
682 r = t2 - addend; */
683 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
684 if (op != unknown_optab
685 && optab_handler (op, TYPE_MODE (type))
686 != CODE_FOR_nothing)
688 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
689 addend);
690 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
691 cur_op, mask);
692 op = optab_for_tree_code (MINUS_EXPR, type,
693 optab_default);
694 if (op != unknown_optab
695 && optab_handler (op, TYPE_MODE (type))
696 != CODE_FOR_nothing)
697 return gimplify_build2 (gsi, MINUS_EXPR, type,
698 cur_op, addend);
705 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
706 return NULL_TREE;
708 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
709 return NULL_TREE;
711 cur_op = op0;
713 switch (mode)
715 case 0:
716 gcc_assert (unsignedp);
717 /* t1 = oprnd0 >> pre_shift;
718 t2 = t1 h* ml;
719 q = t2 >> post_shift; */
720 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
721 if (cur_op == NULL_TREE)
722 return NULL_TREE;
723 break;
724 case 1:
725 gcc_assert (unsignedp);
726 for (i = 0; i < nunits; i++)
728 shift_temps[i] = 1;
729 post_shifts[i]--;
731 break;
732 case 2:
733 case 3:
734 case 4:
735 case 5:
736 gcc_assert (!unsignedp);
737 for (i = 0; i < nunits; i++)
738 shift_temps[i] = prec - 1;
739 break;
740 default:
741 return NULL_TREE;
744 for (i = 0; i < nunits; i++)
745 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
746 mulcst = build_vector (type, vec);
748 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
750 switch (mode)
752 case 0:
753 /* t1 = oprnd0 >> pre_shift;
754 t2 = t1 h* ml;
755 q = t2 >> post_shift; */
756 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
757 break;
758 case 1:
759 /* t1 = oprnd0 h* ml;
760 t2 = oprnd0 - t1;
761 t3 = t2 >> 1;
762 t4 = t1 + t3;
763 q = t4 >> (post_shift - 1); */
764 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
765 if (op == unknown_optab
766 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
767 return NULL_TREE;
768 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
769 tem = add_rshift (gsi, type, tem, shift_temps);
770 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
771 if (op == unknown_optab
772 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
773 return NULL_TREE;
774 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
775 cur_op = add_rshift (gsi, type, tem, post_shifts);
776 if (cur_op == NULL_TREE)
777 return NULL_TREE;
778 break;
779 case 2:
780 case 3:
781 case 4:
782 case 5:
783 /* t1 = oprnd0 h* ml;
784 t2 = t1; [ iff (mode & 2) != 0 ]
785 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
786 t3 = t2 >> post_shift;
787 t4 = oprnd0 >> (prec - 1);
788 q = t3 - t4; [ iff (mode & 1) == 0 ]
789 q = t4 - t3; [ iff (mode & 1) != 0 ] */
790 if ((mode & 2) == 0)
792 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
793 if (op == unknown_optab
794 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
795 return NULL_TREE;
796 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
798 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
799 if (cur_op == NULL_TREE)
800 return NULL_TREE;
801 tem = add_rshift (gsi, type, op0, shift_temps);
802 if (tem == NULL_TREE)
803 return NULL_TREE;
804 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
805 if (op == unknown_optab
806 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
807 return NULL_TREE;
808 if ((mode & 1) == 0)
809 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
810 else
811 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
812 break;
813 default:
814 gcc_unreachable ();
817 if (code == TRUNC_DIV_EXPR)
818 return cur_op;
820 /* We divided. Now finish by:
821 t1 = q * oprnd1;
822 r = oprnd0 - t1; */
823 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
824 if (op == unknown_optab
825 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
826 return NULL_TREE;
827 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
828 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
829 if (op == unknown_optab
830 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
831 return NULL_TREE;
832 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
835 /* Expand a vector condition to scalars, by using many conditions
836 on the vector's elements. */
837 static void
838 expand_vector_condition (gimple_stmt_iterator *gsi)
840 gimple stmt = gsi_stmt (*gsi);
841 tree type = gimple_expr_type (stmt);
842 tree a = gimple_assign_rhs1 (stmt);
843 tree a1 = a;
844 tree a2;
845 bool a_is_comparison = false;
846 tree b = gimple_assign_rhs2 (stmt);
847 tree c = gimple_assign_rhs3 (stmt);
848 vec<constructor_elt, va_gc> *v;
849 tree constr;
850 tree inner_type = TREE_TYPE (type);
851 tree cond_type = TREE_TYPE (TREE_TYPE (a));
852 tree comp_inner_type = cond_type;
853 tree width = TYPE_SIZE (inner_type);
854 tree index = bitsize_int (0);
855 int nunits = TYPE_VECTOR_SUBPARTS (type);
856 int i;
857 location_t loc = gimple_location (gsi_stmt (*gsi));
859 if (!is_gimple_val (a))
861 gcc_assert (COMPARISON_CLASS_P (a));
862 a_is_comparison = true;
863 a1 = TREE_OPERAND (a, 0);
864 a2 = TREE_OPERAND (a, 1);
865 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
868 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
869 return;
871 /* TODO: try and find a smaller vector type. */
873 warning_at (loc, OPT_Wvector_operation_performance,
874 "vector condition will be expanded piecewise");
876 vec_alloc (v, nunits);
877 for (i = 0; i < nunits;
878 i++, index = int_const_binop (PLUS_EXPR, index, width))
880 tree aa, result;
881 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
882 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
883 if (a_is_comparison)
885 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
886 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
887 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
889 else
890 aa = tree_vec_extract (gsi, cond_type, a, width, index);
891 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
892 constructor_elt ce = {NULL_TREE, result};
893 v->quick_push (ce);
896 constr = build_constructor (type, v);
897 gimple_assign_set_rhs_from_tree (gsi, constr);
898 update_stmt (gsi_stmt (*gsi));
901 static tree
902 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
903 gimple assign, enum tree_code code)
905 enum machine_mode compute_mode = TYPE_MODE (compute_type);
907 /* If the compute mode is not a vector mode (hence we are not decomposing
908 a BLKmode vector to smaller, hardware-supported vectors), we may want
909 to expand the operations in parallel. */
910 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
911 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
912 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
913 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
914 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
915 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
916 switch (code)
918 case PLUS_EXPR:
919 case MINUS_EXPR:
920 if (!TYPE_OVERFLOW_TRAPS (type))
921 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
922 gimple_assign_rhs1 (assign),
923 gimple_assign_rhs2 (assign), code);
924 break;
926 case NEGATE_EXPR:
927 if (!TYPE_OVERFLOW_TRAPS (type))
928 return expand_vector_addition (gsi, do_unop, do_negate, type,
929 gimple_assign_rhs1 (assign),
930 NULL_TREE, code);
931 break;
933 case BIT_AND_EXPR:
934 case BIT_IOR_EXPR:
935 case BIT_XOR_EXPR:
936 return expand_vector_parallel (gsi, do_binop, type,
937 gimple_assign_rhs1 (assign),
938 gimple_assign_rhs2 (assign), code);
940 case BIT_NOT_EXPR:
941 return expand_vector_parallel (gsi, do_unop, type,
942 gimple_assign_rhs1 (assign),
943 NULL_TREE, code);
944 case EQ_EXPR:
945 case NE_EXPR:
946 case GT_EXPR:
947 case LT_EXPR:
948 case GE_EXPR:
949 case LE_EXPR:
950 case UNEQ_EXPR:
951 case UNGT_EXPR:
952 case UNLT_EXPR:
953 case UNGE_EXPR:
954 case UNLE_EXPR:
955 case LTGT_EXPR:
956 case ORDERED_EXPR:
957 case UNORDERED_EXPR:
959 tree rhs1 = gimple_assign_rhs1 (assign);
960 tree rhs2 = gimple_assign_rhs2 (assign);
962 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
965 case TRUNC_DIV_EXPR:
966 case TRUNC_MOD_EXPR:
968 tree rhs1 = gimple_assign_rhs1 (assign);
969 tree rhs2 = gimple_assign_rhs2 (assign);
970 tree ret;
972 if (!optimize
973 || !VECTOR_INTEGER_TYPE_P (type)
974 || TREE_CODE (rhs2) != VECTOR_CST
975 || !VECTOR_MODE_P (TYPE_MODE (type)))
976 break;
978 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
979 if (ret != NULL_TREE)
980 return ret;
981 break;
984 default:
985 break;
988 if (TREE_CODE_CLASS (code) == tcc_unary)
989 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
990 gimple_assign_rhs1 (assign),
991 NULL_TREE, code);
992 else
993 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
994 gimple_assign_rhs1 (assign),
995 gimple_assign_rhs2 (assign), code);
998 /* Try to optimize
999 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1000 style stmts into:
1001 _9 = { b_7, b_7, b_7, b_7 };
1002 a_5 = _9 + { 0, 3, 6, 9 };
1003 because vector splat operation is usually more efficient
1004 than piecewise initialization of the vector. */
1006 static void
1007 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1009 gimple stmt = gsi_stmt (*gsi);
1010 tree lhs = gimple_assign_lhs (stmt);
1011 tree rhs = gimple_assign_rhs1 (stmt);
1012 tree type = TREE_TYPE (rhs);
1013 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1014 bool all_same = true;
1015 constructor_elt *elt;
1016 tree *cst;
1017 gimple g;
1018 tree base = NULL_TREE;
1019 optab op;
1021 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1022 return;
1023 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1024 if (op == unknown_optab
1025 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1026 return;
1027 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1028 if (TREE_CODE (elt->value) != SSA_NAME
1029 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1030 return;
1031 else
1033 tree this_base = elt->value;
1034 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1035 all_same = false;
1036 for (j = 0; j < nelts + 1; j++)
1038 g = SSA_NAME_DEF_STMT (this_base);
1039 if (is_gimple_assign (g)
1040 && gimple_assign_rhs_code (g) == PLUS_EXPR
1041 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1042 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1043 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1044 this_base = gimple_assign_rhs1 (g);
1045 else
1046 break;
1048 if (i == 0)
1049 base = this_base;
1050 else if (this_base != base)
1051 return;
1053 if (all_same)
1054 return;
1055 cst = XALLOCAVEC (tree, nelts);
1056 for (i = 0; i < nelts; i++)
1058 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1059 cst[i] = build_zero_cst (TREE_TYPE (base));
1060 while (this_base != base)
1062 g = SSA_NAME_DEF_STMT (this_base);
1063 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1064 cst[i], gimple_assign_rhs2 (g));
1065 if (cst[i] == NULL_TREE
1066 || TREE_CODE (cst[i]) != INTEGER_CST
1067 || TREE_OVERFLOW (cst[i]))
1068 return;
1069 this_base = gimple_assign_rhs1 (g);
1072 for (i = 0; i < nelts; i++)
1073 CONSTRUCTOR_ELT (rhs, i)->value = base;
1074 g = gimple_build_assign (make_ssa_name (type, NULL), rhs);
1075 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1076 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, gimple_assign_lhs (g),
1077 build_vector (type, cst));
1078 gsi_replace (gsi, g, false);
1081 /* Return a type for the widest vector mode whose components are of type
1082 TYPE, or NULL_TREE if none is found. */
1084 static tree
1085 type_for_widest_vector_mode (tree type, optab op)
1087 enum machine_mode inner_mode = TYPE_MODE (type);
1088 enum machine_mode best_mode = VOIDmode, mode;
1089 int best_nunits = 0;
1091 if (SCALAR_FLOAT_MODE_P (inner_mode))
1092 mode = MIN_MODE_VECTOR_FLOAT;
1093 else if (SCALAR_FRACT_MODE_P (inner_mode))
1094 mode = MIN_MODE_VECTOR_FRACT;
1095 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1096 mode = MIN_MODE_VECTOR_UFRACT;
1097 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1098 mode = MIN_MODE_VECTOR_ACCUM;
1099 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1100 mode = MIN_MODE_VECTOR_UACCUM;
1101 else
1102 mode = MIN_MODE_VECTOR_INT;
1104 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1105 if (GET_MODE_INNER (mode) == inner_mode
1106 && GET_MODE_NUNITS (mode) > best_nunits
1107 && optab_handler (op, mode) != CODE_FOR_nothing)
1108 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1110 if (best_mode == VOIDmode)
1111 return NULL_TREE;
1112 else
1113 return build_vector_type_for_mode (type, best_mode);
1117 /* Build a reference to the element of the vector VECT. Function
1118 returns either the element itself, either BIT_FIELD_REF, or an
1119 ARRAY_REF expression.
1121 GSI is required to insert temporary variables while building a
1122 refernece to the element of the vector VECT.
1124 PTMPVEC is a pointer to the temporary variable for caching
1125 purposes. In case when PTMPVEC is NULL new temporary variable
1126 will be created. */
1127 static tree
1128 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1130 tree vect_type, vect_elt_type;
1131 gimple asgn;
1132 tree tmpvec;
1133 tree arraytype;
1134 bool need_asgn = true;
1135 unsigned int elements;
1137 vect_type = TREE_TYPE (vect);
1138 vect_elt_type = TREE_TYPE (vect_type);
1139 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1141 if (TREE_CODE (idx) == INTEGER_CST)
1143 unsigned HOST_WIDE_INT index;
1145 /* Given that we're about to compute a binary modulus,
1146 we don't care about the high bits of the value. */
1147 index = TREE_INT_CST_LOW (idx);
1148 if (!tree_fits_uhwi_p (idx) || index >= elements)
1150 index &= elements - 1;
1151 idx = build_int_cst (TREE_TYPE (idx), index);
1154 /* When lowering a vector statement sequence do some easy
1155 simplification by looking through intermediate vector results. */
1156 if (TREE_CODE (vect) == SSA_NAME)
1158 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1159 if (is_gimple_assign (def_stmt)
1160 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1161 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1162 vect = gimple_assign_rhs1 (def_stmt);
1165 if (TREE_CODE (vect) == VECTOR_CST)
1166 return VECTOR_CST_ELT (vect, index);
1167 else if (TREE_CODE (vect) == CONSTRUCTOR
1168 && (CONSTRUCTOR_NELTS (vect) == 0
1169 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1170 != VECTOR_TYPE))
1172 if (index < CONSTRUCTOR_NELTS (vect))
1173 return CONSTRUCTOR_ELT (vect, index)->value;
1174 return build_zero_cst (vect_elt_type);
1176 else
1178 tree size = TYPE_SIZE (vect_elt_type);
1179 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1180 size);
1181 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1185 if (!ptmpvec)
1186 tmpvec = create_tmp_var (vect_type, "vectmp");
1187 else if (!*ptmpvec)
1188 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1189 else
1191 tmpvec = *ptmpvec;
1192 need_asgn = false;
1195 if (need_asgn)
1197 TREE_ADDRESSABLE (tmpvec) = 1;
1198 asgn = gimple_build_assign (tmpvec, vect);
1199 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1202 arraytype = build_array_type_nelts (vect_elt_type, elements);
1203 return build4 (ARRAY_REF, vect_elt_type,
1204 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1205 idx, NULL_TREE, NULL_TREE);
1208 /* Check if VEC_PERM_EXPR within the given setting is supported
1209 by hardware, or lower it piecewise.
1211 When VEC_PERM_EXPR has the same first and second operands:
1212 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1213 {v0[mask[0]], v0[mask[1]], ...}
1214 MASK and V0 must have the same number of elements.
1216 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1217 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1218 V0 and V1 must have the same type. MASK, V0, V1 must have the
1219 same number of arguments. */
1221 static void
1222 lower_vec_perm (gimple_stmt_iterator *gsi)
1224 gimple stmt = gsi_stmt (*gsi);
1225 tree mask = gimple_assign_rhs3 (stmt);
1226 tree vec0 = gimple_assign_rhs1 (stmt);
1227 tree vec1 = gimple_assign_rhs2 (stmt);
1228 tree vect_type = TREE_TYPE (vec0);
1229 tree mask_type = TREE_TYPE (mask);
1230 tree vect_elt_type = TREE_TYPE (vect_type);
1231 tree mask_elt_type = TREE_TYPE (mask_type);
1232 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1233 vec<constructor_elt, va_gc> *v;
1234 tree constr, t, si, i_val;
1235 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1236 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1237 location_t loc = gimple_location (gsi_stmt (*gsi));
1238 unsigned i;
1240 if (TREE_CODE (mask) == SSA_NAME)
1242 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1243 if (is_gimple_assign (def_stmt)
1244 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1245 mask = gimple_assign_rhs1 (def_stmt);
1248 if (TREE_CODE (mask) == VECTOR_CST)
1250 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1252 for (i = 0; i < elements; ++i)
1253 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1254 & (2 * elements - 1));
1256 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1258 gimple_assign_set_rhs3 (stmt, mask);
1259 update_stmt (stmt);
1260 return;
1263 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1264 return;
1266 warning_at (loc, OPT_Wvector_operation_performance,
1267 "vector shuffling operation will be expanded piecewise");
1269 vec_alloc (v, elements);
1270 for (i = 0; i < elements; i++)
1272 si = size_int (i);
1273 i_val = vector_element (gsi, mask, si, &masktmp);
1275 if (TREE_CODE (i_val) == INTEGER_CST)
1277 unsigned HOST_WIDE_INT index;
1279 index = TREE_INT_CST_LOW (i_val);
1280 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1281 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1283 if (two_operand_p && (index & elements) != 0)
1284 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1285 else
1286 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1288 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1289 true, GSI_SAME_STMT);
1291 else
1293 tree cond = NULL_TREE, v0_val;
1295 if (two_operand_p)
1297 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1298 build_int_cst (mask_elt_type, elements));
1299 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1300 true, GSI_SAME_STMT);
1303 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1304 build_int_cst (mask_elt_type, elements - 1));
1305 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1306 true, GSI_SAME_STMT);
1308 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1309 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1310 true, GSI_SAME_STMT);
1312 if (two_operand_p)
1314 tree v1_val;
1316 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1317 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1318 true, GSI_SAME_STMT);
1320 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1321 cond, build_zero_cst (mask_elt_type));
1322 cond = fold_build3 (COND_EXPR, vect_elt_type,
1323 cond, v0_val, v1_val);
1324 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1325 true, GSI_SAME_STMT);
1327 else
1328 t = v0_val;
1331 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1334 constr = build_constructor (vect_type, v);
1335 gimple_assign_set_rhs_from_tree (gsi, constr);
1336 update_stmt (gsi_stmt (*gsi));
1339 /* Return type in which CODE operation with optab OP can be
1340 computed. */
1342 static tree
1343 get_compute_type (enum tree_code code, optab op, tree type)
1345 /* For very wide vectors, try using a smaller vector mode. */
1346 tree compute_type = type;
1347 if (op
1348 && (!VECTOR_MODE_P (TYPE_MODE (type))
1349 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1351 tree vector_compute_type
1352 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1353 if (vector_compute_type != NULL_TREE
1354 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1355 < TYPE_VECTOR_SUBPARTS (compute_type))
1356 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1357 != CODE_FOR_nothing))
1358 compute_type = vector_compute_type;
1361 /* If we are breaking a BLKmode vector into smaller pieces,
1362 type_for_widest_vector_mode has already looked into the optab,
1363 so skip these checks. */
1364 if (compute_type == type)
1366 enum machine_mode compute_mode = TYPE_MODE (compute_type);
1367 if (VECTOR_MODE_P (compute_mode))
1369 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1370 return compute_type;
1371 if (code == MULT_HIGHPART_EXPR
1372 && can_mult_highpart_p (compute_mode,
1373 TYPE_UNSIGNED (compute_type)))
1374 return compute_type;
1376 /* There is no operation in hardware, so fall back to scalars. */
1377 compute_type = TREE_TYPE (type);
1380 return compute_type;
1383 /* Helper function of expand_vector_operations_1. Return number of
1384 vector elements for vector types or 1 for other types. */
1386 static inline int
1387 count_type_subparts (tree type)
1389 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1392 static tree
1393 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1394 tree bitpos, tree bitsize, enum tree_code code)
1396 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1397 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1398 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1399 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1400 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1401 return gimplify_build3 (gsi, code, inner_type, cond, a, b);
1404 /* Expand a vector COND_EXPR to scalars, piecewise. */
1405 static void
1406 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1408 gimple stmt = gsi_stmt (*gsi);
1409 tree type = gimple_expr_type (stmt);
1410 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1411 machine_mode compute_mode = TYPE_MODE (compute_type);
1412 gcc_assert (compute_mode != BLKmode);
1413 tree lhs = gimple_assign_lhs (stmt);
1414 tree rhs2 = gimple_assign_rhs2 (stmt);
1415 tree rhs3 = gimple_assign_rhs3 (stmt);
1416 tree new_rhs;
1418 /* If the compute mode is not a vector mode (hence we are not decomposing
1419 a BLKmode vector to smaller, hardware-supported vectors), we may want
1420 to expand the operations in parallel. */
1421 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1422 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1423 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1424 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1425 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1426 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1427 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1428 COND_EXPR);
1429 else
1430 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1431 rhs2, rhs3, COND_EXPR);
1432 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1433 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1434 new_rhs);
1436 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1437 way to do it is change expand_vector_operation and its callees to
1438 return a tree_code, RHS1 and RHS2 instead of a tree. */
1439 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1440 update_stmt (gsi_stmt (*gsi));
1443 /* Process one statement. If we identify a vector operation, expand it. */
1445 static void
1446 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1448 gimple stmt = gsi_stmt (*gsi);
1449 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1450 enum tree_code code;
1451 optab op = unknown_optab;
1452 enum gimple_rhs_class rhs_class;
1453 tree new_rhs;
1455 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1456 return;
1458 code = gimple_assign_rhs_code (stmt);
1459 rhs_class = get_gimple_rhs_class (code);
1460 lhs = gimple_assign_lhs (stmt);
1462 if (code == VEC_PERM_EXPR)
1464 lower_vec_perm (gsi);
1465 return;
1468 if (code == VEC_COND_EXPR)
1470 expand_vector_condition (gsi);
1471 return;
1474 if (code == COND_EXPR
1475 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1476 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1478 expand_vector_scalar_condition (gsi);
1479 return;
1482 if (code == CONSTRUCTOR
1483 && TREE_CODE (lhs) == SSA_NAME
1484 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1485 && !gimple_clobber_p (stmt)
1486 && optimize)
1488 optimize_vector_constructor (gsi);
1489 return;
1492 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1493 return;
1495 rhs1 = gimple_assign_rhs1 (stmt);
1496 type = gimple_expr_type (stmt);
1497 if (rhs_class == GIMPLE_BINARY_RHS)
1498 rhs2 = gimple_assign_rhs2 (stmt);
1500 if (TREE_CODE (type) != VECTOR_TYPE)
1501 return;
1503 if (code == NOP_EXPR
1504 || code == FLOAT_EXPR
1505 || code == FIX_TRUNC_EXPR
1506 || code == VIEW_CONVERT_EXPR)
1507 return;
1509 gcc_assert (code != CONVERT_EXPR);
1511 /* The signedness is determined from input argument. */
1512 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1513 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1514 type = TREE_TYPE (rhs1);
1516 /* For widening/narrowing vector operations, the relevant type is of the
1517 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1518 calculated in the same way above. */
1519 if (code == WIDEN_SUM_EXPR
1520 || code == VEC_WIDEN_MULT_HI_EXPR
1521 || code == VEC_WIDEN_MULT_LO_EXPR
1522 || code == VEC_WIDEN_MULT_EVEN_EXPR
1523 || code == VEC_WIDEN_MULT_ODD_EXPR
1524 || code == VEC_UNPACK_HI_EXPR
1525 || code == VEC_UNPACK_LO_EXPR
1526 || code == VEC_PACK_TRUNC_EXPR
1527 || code == VEC_PACK_SAT_EXPR
1528 || code == VEC_PACK_FIX_TRUNC_EXPR
1529 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1530 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1531 type = TREE_TYPE (rhs1);
1533 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1534 scalar */
1535 if (code == LSHIFT_EXPR
1536 || code == RSHIFT_EXPR
1537 || code == LROTATE_EXPR
1538 || code == RROTATE_EXPR)
1540 optab opv;
1542 /* Check whether we have vector <op> {x,x,x,x} where x
1543 could be a scalar variable or a constant. Transform
1544 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1545 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1547 tree first;
1548 gimple def_stmt;
1550 if ((TREE_CODE (rhs2) == VECTOR_CST
1551 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1552 || (TREE_CODE (rhs2) == SSA_NAME
1553 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1554 && gimple_assign_single_p (def_stmt)
1555 && (first = uniform_vector_p
1556 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1558 gimple_assign_set_rhs2 (stmt, first);
1559 update_stmt (stmt);
1560 rhs2 = first;
1564 opv = optab_for_tree_code (code, type, optab_vector);
1565 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1566 op = opv;
1567 else
1569 op = optab_for_tree_code (code, type, optab_scalar);
1571 compute_type = get_compute_type (code, op, type);
1572 if (compute_type == type)
1573 return;
1574 /* The rtl expander will expand vector/scalar as vector/vector
1575 if necessary. Pick one with wider vector type. */
1576 tree compute_vtype = get_compute_type (code, opv, type);
1577 if (count_type_subparts (compute_vtype)
1578 > count_type_subparts (compute_type))
1580 compute_type = compute_vtype;
1581 op = opv;
1585 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1587 if (compute_type == NULL_TREE)
1588 compute_type = get_compute_type (code, op, type);
1589 if (compute_type == type)
1590 return;
1591 /* Before splitting vector rotates into scalar rotates,
1592 see if we can't use vector shifts and BIT_IOR_EXPR
1593 instead. For vector by vector rotates we'd also
1594 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1595 for now, fold doesn't seem to create such rotates anyway. */
1596 if (compute_type == TREE_TYPE (type)
1597 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1599 optab oplv = vashl_optab, opl = ashl_optab;
1600 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1601 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1602 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1603 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1604 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1605 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1606 /* The rtl expander will expand vector/scalar as vector/vector
1607 if necessary. Pick one with wider vector type. */
1608 if (count_type_subparts (compute_lvtype)
1609 > count_type_subparts (compute_ltype))
1611 compute_ltype = compute_lvtype;
1612 opl = oplv;
1614 if (count_type_subparts (compute_rvtype)
1615 > count_type_subparts (compute_rtype))
1617 compute_rtype = compute_rvtype;
1618 opr = oprv;
1620 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1621 BIT_IOR_EXPR. */
1622 compute_type = compute_ltype;
1623 if (count_type_subparts (compute_type)
1624 > count_type_subparts (compute_rtype))
1625 compute_type = compute_rtype;
1626 if (count_type_subparts (compute_type)
1627 > count_type_subparts (compute_otype))
1628 compute_type = compute_otype;
1629 /* Verify all 3 operations can be performed in that type. */
1630 if (compute_type != TREE_TYPE (type))
1632 if (optab_handler (opl, TYPE_MODE (compute_type))
1633 == CODE_FOR_nothing
1634 || optab_handler (opr, TYPE_MODE (compute_type))
1635 == CODE_FOR_nothing
1636 || optab_handler (opo, TYPE_MODE (compute_type))
1637 == CODE_FOR_nothing)
1638 compute_type = TREE_TYPE (type);
1643 else
1644 op = optab_for_tree_code (code, type, optab_default);
1646 /* Optabs will try converting a negation into a subtraction, so
1647 look for it as well. TODO: negation of floating-point vectors
1648 might be turned into an exclusive OR toggling the sign bit. */
1649 if (op == unknown_optab
1650 && code == NEGATE_EXPR
1651 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1652 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1654 if (compute_type == NULL_TREE)
1655 compute_type = get_compute_type (code, op, type);
1656 if (compute_type == type)
1657 return;
1659 gcc_assert (code != VEC_RSHIFT_EXPR);
1660 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1662 /* Leave expression untouched for later expansion. */
1663 if (new_rhs == NULL_TREE)
1664 return;
1666 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1667 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1668 new_rhs);
1670 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1671 way to do it is change expand_vector_operation and its callees to
1672 return a tree_code, RHS1 and RHS2 instead of a tree. */
1673 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1674 update_stmt (gsi_stmt (*gsi));
1677 /* Use this to lower vector operations introduced by the vectorizer,
1678 if it may need the bit-twiddling tricks implemented in this file. */
1680 static bool
1681 gate_expand_vector_operations_ssa (void)
1683 return !(cfun->curr_properties & PROP_gimple_lvec);
1686 static unsigned int
1687 expand_vector_operations (void)
1689 gimple_stmt_iterator gsi;
1690 basic_block bb;
1691 bool cfg_changed = false;
1693 FOR_EACH_BB_FN (bb, cfun)
1695 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1697 expand_vector_operations_1 (&gsi);
1698 /* ??? If we do not cleanup EH then we will ICE in
1699 verification. But in reality we have created wrong-code
1700 as we did not properly transition EH info and edges to
1701 the piecewise computations. */
1702 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1703 && gimple_purge_dead_eh_edges (bb))
1704 cfg_changed = true;
1708 return cfg_changed ? TODO_cleanup_cfg : 0;
1711 namespace {
1713 const pass_data pass_data_lower_vector =
1715 GIMPLE_PASS, /* type */
1716 "veclower", /* name */
1717 OPTGROUP_VEC, /* optinfo_flags */
1718 true, /* has_gate */
1719 true, /* has_execute */
1720 TV_NONE, /* tv_id */
1721 PROP_cfg, /* properties_required */
1722 PROP_gimple_lvec, /* properties_provided */
1723 0, /* properties_destroyed */
1724 0, /* todo_flags_start */
1725 ( TODO_update_ssa | TODO_verify_ssa
1726 | TODO_verify_stmts
1727 | TODO_verify_flow
1728 | TODO_cleanup_cfg ), /* todo_flags_finish */
1731 class pass_lower_vector : public gimple_opt_pass
1733 public:
1734 pass_lower_vector (gcc::context *ctxt)
1735 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1738 /* opt_pass methods: */
1739 bool gate () { return gate_expand_vector_operations_ssa (); }
1740 unsigned int execute () { return expand_vector_operations (); }
1742 }; // class pass_lower_vector
1744 } // anon namespace
1746 gimple_opt_pass *
1747 make_pass_lower_vector (gcc::context *ctxt)
1749 return new pass_lower_vector (ctxt);
1752 namespace {
1754 const pass_data pass_data_lower_vector_ssa =
1756 GIMPLE_PASS, /* type */
1757 "veclower2", /* name */
1758 OPTGROUP_VEC, /* optinfo_flags */
1759 false, /* has_gate */
1760 true, /* has_execute */
1761 TV_NONE, /* tv_id */
1762 PROP_cfg, /* properties_required */
1763 PROP_gimple_lvec, /* properties_provided */
1764 0, /* properties_destroyed */
1765 0, /* todo_flags_start */
1766 ( TODO_update_ssa | TODO_verify_ssa
1767 | TODO_verify_stmts
1768 | TODO_verify_flow
1769 | TODO_cleanup_cfg ), /* todo_flags_finish */
1772 class pass_lower_vector_ssa : public gimple_opt_pass
1774 public:
1775 pass_lower_vector_ssa (gcc::context *ctxt)
1776 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1779 /* opt_pass methods: */
1780 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1781 unsigned int execute () { return expand_vector_operations (); }
1783 }; // class pass_lower_vector_ssa
1785 } // anon namespace
1787 gimple_opt_pass *
1788 make_pass_lower_vector_ssa (gcc::context *ctxt)
1790 return new pass_lower_vector_ssa (ctxt);
1793 #include "gt-tree-vect-generic.h"