Daily bump.
[official-gcc.git] / gcc / tree-vect-generic.c
blobb2a6944687c144619bee7deb8d97fb285e38dc21
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tree.h"
24 #include "tm.h"
25 #include "langhooks.h"
26 #include "tree-flow.h"
27 #include "gimple.h"
28 #include "tree-iterator.h"
29 #include "tree-pass.h"
30 #include "flags.h"
31 #include "ggc.h"
32 #include "diagnostic.h"
33 #include "target.h"
35 /* Need to include rtl.h, expr.h, etc. for optabs. */
36 #include "expr.h"
37 #include "optabs.h"
40 static void expand_vector_operations_1 (gimple_stmt_iterator *);
43 /* Build a constant of type TYPE, made of VALUE's bits replicated
44 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
45 static tree
46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
48 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
49 int n = HOST_BITS_PER_WIDE_INT / width;
50 unsigned HOST_WIDE_INT low, high, mask;
51 tree ret;
53 gcc_assert (n);
55 if (width == HOST_BITS_PER_WIDE_INT)
56 low = value;
57 else
59 mask = ((HOST_WIDE_INT)1 << width) - 1;
60 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
63 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
64 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
65 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
66 high = 0;
67 else if (TYPE_PRECISION (type) == HOST_BITS_PER_DOUBLE_INT)
68 high = low;
69 else
70 gcc_unreachable ();
72 ret = build_int_cst_wide (type, low, high);
73 return ret;
76 static GTY(()) tree vector_inner_type;
77 static GTY(()) tree vector_last_type;
78 static GTY(()) int vector_last_nunits;
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81 "word_mode" (the global variable). */
82 static tree
83 build_word_mode_vector_type (int nunits)
85 if (!vector_inner_type)
86 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
87 else if (vector_last_nunits == nunits)
89 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
90 return vector_last_type;
93 /* We build a new type, but we canonicalize it nevertheless,
94 because it still saves some memory. */
95 vector_last_nunits = nunits;
96 vector_last_type = type_hash_canon (nunits,
97 build_vector_type (vector_inner_type,
98 nunits));
99 return vector_last_type;
102 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
103 tree, tree, tree, tree, tree, enum tree_code);
105 static inline tree
106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107 tree t, tree bitsize, tree bitpos)
109 if (bitpos)
110 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
111 else
112 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
115 static tree
116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
117 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
118 enum tree_code code)
120 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
121 return gimplify_build1 (gsi, code, inner_type, a);
124 static tree
125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
126 tree bitpos, tree bitsize, enum tree_code code)
128 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
129 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
130 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
131 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
132 return gimplify_build2 (gsi, code, inner_type, a, b);
135 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
137 INNER_TYPE is the type of A and B elements
139 returned expression is of signed integer type with the
140 size equal to the size of INNER_TYPE. */
141 static tree
142 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
143 tree bitpos, tree bitsize, enum tree_code code)
145 tree comp_type;
147 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
148 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
150 comp_type = build_nonstandard_integer_type
151 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
153 return gimplify_build3 (gsi, COND_EXPR, comp_type,
154 fold_build2 (code, boolean_type_node, a, b),
155 build_int_cst (comp_type, -1),
156 build_int_cst (comp_type, 0));
159 /* Expand vector addition to scalars. This does bit twiddling
160 in order to increase parallelism:
162 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
163 (a ^ b) & 0x80808080
165 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
166 (a ^ ~b) & 0x80808080
168 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
170 This optimization should be done only if 4 vector items or more
171 fit into a word. */
172 static tree
173 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
174 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
175 enum tree_code code)
177 tree inner_type = TREE_TYPE (TREE_TYPE (a));
178 unsigned HOST_WIDE_INT max;
179 tree low_bits, high_bits, a_low, b_low, result_low, signs;
181 max = GET_MODE_MASK (TYPE_MODE (inner_type));
182 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
183 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
185 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
186 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
188 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
189 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
190 if (code == PLUS_EXPR)
191 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
192 else
194 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
195 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
198 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
199 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
200 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
203 static tree
204 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
205 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
206 tree bitsize ATTRIBUTE_UNUSED,
207 enum tree_code code ATTRIBUTE_UNUSED)
209 tree inner_type = TREE_TYPE (TREE_TYPE (b));
210 HOST_WIDE_INT max;
211 tree low_bits, high_bits, b_low, result_low, signs;
213 max = GET_MODE_MASK (TYPE_MODE (inner_type));
214 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
215 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
217 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
219 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
220 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
221 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
222 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
223 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
226 /* Expand a vector operation to scalars, by using many operations
227 whose type is the vector type's inner type. */
228 static tree
229 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
230 tree type, tree inner_type,
231 tree a, tree b, enum tree_code code)
233 vec<constructor_elt, va_gc> *v;
234 tree part_width = TYPE_SIZE (inner_type);
235 tree index = bitsize_int (0);
236 int nunits = TYPE_VECTOR_SUBPARTS (type);
237 int delta = tree_low_cst (part_width, 1)
238 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
239 int i;
240 location_t loc = gimple_location (gsi_stmt (*gsi));
242 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
243 warning_at (loc, OPT_Wvector_operation_performance,
244 "vector operation will be expanded piecewise");
245 else
246 warning_at (loc, OPT_Wvector_operation_performance,
247 "vector operation will be expanded in parallel");
249 vec_alloc (v, (nunits + delta - 1) / delta);
250 for (i = 0; i < nunits;
251 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
253 tree result = f (gsi, inner_type, a, b, index, part_width, code);
254 constructor_elt ce = {NULL_TREE, result};
255 v->quick_push (ce);
258 return build_constructor (type, v);
261 /* Expand a vector operation to scalars with the freedom to use
262 a scalar integer type, or to use a different size for the items
263 in the vector type. */
264 static tree
265 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
266 tree a, tree b,
267 enum tree_code code)
269 tree result, compute_type;
270 enum machine_mode mode;
271 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
272 location_t loc = gimple_location (gsi_stmt (*gsi));
274 /* We have three strategies. If the type is already correct, just do
275 the operation an element at a time. Else, if the vector is wider than
276 one word, do it a word at a time; finally, if the vector is smaller
277 than one word, do it as a scalar. */
278 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
279 return expand_vector_piecewise (gsi, f,
280 type, TREE_TYPE (type),
281 a, b, code);
282 else if (n_words > 1)
284 tree word_type = build_word_mode_vector_type (n_words);
285 result = expand_vector_piecewise (gsi, f,
286 word_type, TREE_TYPE (word_type),
287 a, b, code);
288 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
289 GSI_SAME_STMT);
291 else
293 /* Use a single scalar operation with a mode no wider than word_mode. */
294 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
295 compute_type = lang_hooks.types.type_for_mode (mode, 1);
296 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
297 warning_at (loc, OPT_Wvector_operation_performance,
298 "vector operation will be expanded with a "
299 "single scalar operation");
302 return result;
305 /* Expand a vector operation to scalars; for integer types we can use
306 special bit twiddling tricks to do the sums a word at a time, using
307 function F_PARALLEL instead of F. These tricks are done only if
308 they can process at least four items, that is, only if the vector
309 holds at least four items and if a word can hold four items. */
310 static tree
311 expand_vector_addition (gimple_stmt_iterator *gsi,
312 elem_op_func f, elem_op_func f_parallel,
313 tree type, tree a, tree b, enum tree_code code)
315 int parts_per_word = UNITS_PER_WORD
316 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
318 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
319 && parts_per_word >= 4
320 && TYPE_VECTOR_SUBPARTS (type) >= 4)
321 return expand_vector_parallel (gsi, f_parallel,
322 type, a, b, code);
323 else
324 return expand_vector_piecewise (gsi, f,
325 type, TREE_TYPE (type),
326 a, b, code);
329 /* Try to expand vector comparison expression OP0 CODE OP1 by
330 querying optab if the following expression:
331 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
332 can be expanded. */
333 static tree
334 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
335 tree op1, enum tree_code code)
337 tree t;
338 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
339 t = expand_vector_piecewise (gsi, do_compare, type,
340 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
341 else
342 t = NULL_TREE;
344 return t;
347 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
348 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
349 the result if successful, otherwise return NULL_TREE. */
350 static tree
351 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
353 optab op;
354 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
355 bool scalar_shift = true;
357 for (i = 1; i < nunits; i++)
359 if (shiftcnts[i] != shiftcnts[0])
360 scalar_shift = false;
363 if (scalar_shift && shiftcnts[0] == 0)
364 return op0;
366 if (scalar_shift)
368 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
369 if (op != unknown_optab
370 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
371 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
372 build_int_cst (NULL_TREE, shiftcnts[0]));
375 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
376 if (op != unknown_optab
377 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
379 tree *vec = XALLOCAVEC (tree, nunits);
380 for (i = 0; i < nunits; i++)
381 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
382 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
383 build_vector (type, vec));
386 return NULL_TREE;
389 /* Try to expand integer vector division by constant using
390 widening multiply, shifts and additions. */
391 static tree
392 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
393 tree op1, enum tree_code code)
395 bool use_pow2 = true;
396 bool has_vector_shift = true;
397 int mode = -1, this_mode;
398 int pre_shift = -1, post_shift;
399 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
400 int *shifts = XALLOCAVEC (int, nunits * 4);
401 int *pre_shifts = shifts + nunits;
402 int *post_shifts = pre_shifts + nunits;
403 int *shift_temps = post_shifts + nunits;
404 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
405 int prec = TYPE_PRECISION (TREE_TYPE (type));
406 int dummy_int;
407 unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
408 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
409 tree *vec;
410 tree cur_op, mulcst, tem;
411 optab op;
413 if (prec > HOST_BITS_PER_WIDE_INT)
414 return NULL_TREE;
416 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
417 if (op == unknown_optab
418 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
419 has_vector_shift = false;
421 /* Analysis phase. Determine if all op1 elements are either power
422 of two and it is possible to expand it using shifts (or for remainder
423 using masking). Additionally compute the multiplicative constants
424 and pre and post shifts if the division is to be expanded using
425 widening or high part multiplication plus shifts. */
426 for (i = 0; i < nunits; i++)
428 tree cst = VECTOR_CST_ELT (op1, i);
429 unsigned HOST_WIDE_INT ml;
431 if (!host_integerp (cst, unsignedp) || integer_zerop (cst))
432 return NULL_TREE;
433 pre_shifts[i] = 0;
434 post_shifts[i] = 0;
435 mulc[i] = 0;
436 if (use_pow2
437 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
438 use_pow2 = false;
439 if (use_pow2)
441 shifts[i] = tree_log2 (cst);
442 if (shifts[i] != shifts[0]
443 && code == TRUNC_DIV_EXPR
444 && !has_vector_shift)
445 use_pow2 = false;
447 if (mode == -2)
448 continue;
449 if (unsignedp)
451 unsigned HOST_WIDE_INT mh;
452 unsigned HOST_WIDE_INT d = tree_low_cst (cst, 1) & mask;
454 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
455 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
456 return NULL_TREE;
458 if (d <= 1)
460 mode = -2;
461 continue;
464 /* Find a suitable multiplier and right shift count
465 instead of multiplying with D. */
466 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
468 /* If the suggested multiplier is more than SIZE bits, we can
469 do better for even divisors, using an initial right shift. */
470 if ((mh != 0 && (d & 1) == 0)
471 || (!has_vector_shift && pre_shift != -1))
473 if (has_vector_shift)
474 pre_shift = floor_log2 (d & -d);
475 else if (pre_shift == -1)
477 unsigned int j;
478 for (j = 0; j < nunits; j++)
480 tree cst2 = VECTOR_CST_ELT (op1, j);
481 unsigned HOST_WIDE_INT d2;
482 int this_pre_shift;
484 if (!host_integerp (cst2, 1))
485 return NULL_TREE;
486 d2 = tree_low_cst (cst2, 1) & mask;
487 if (d2 == 0)
488 return NULL_TREE;
489 this_pre_shift = floor_log2 (d2 & -d2);
490 if (pre_shift == -1 || this_pre_shift < pre_shift)
491 pre_shift = this_pre_shift;
493 if (i != 0 && pre_shift != 0)
495 /* Restart. */
496 i = -1U;
497 mode = -1;
498 continue;
501 if (pre_shift != 0)
503 if ((d >> pre_shift) <= 1)
505 mode = -2;
506 continue;
508 mh = choose_multiplier (d >> pre_shift, prec,
509 prec - pre_shift,
510 &ml, &post_shift, &dummy_int);
511 gcc_assert (!mh);
512 pre_shifts[i] = pre_shift;
515 if (!mh)
516 this_mode = 0;
517 else
518 this_mode = 1;
520 else
522 HOST_WIDE_INT d = tree_low_cst (cst, 0);
523 unsigned HOST_WIDE_INT abs_d;
525 if (d == -1)
526 return NULL_TREE;
528 /* Since d might be INT_MIN, we have to cast to
529 unsigned HOST_WIDE_INT before negating to avoid
530 undefined signed overflow. */
531 abs_d = (d >= 0
532 ? (unsigned HOST_WIDE_INT) d
533 : - (unsigned HOST_WIDE_INT) d);
535 /* n rem d = n rem -d */
536 if (code == TRUNC_MOD_EXPR && d < 0)
537 d = abs_d;
538 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
540 /* This case is not handled correctly below. */
541 mode = -2;
542 continue;
544 if (abs_d <= 1)
546 mode = -2;
547 continue;
550 choose_multiplier (abs_d, prec, prec - 1, &ml,
551 &post_shift, &dummy_int);
552 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
554 this_mode = 4 + (d < 0);
555 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
557 else
558 this_mode = 2 + (d < 0);
560 mulc[i] = ml;
561 post_shifts[i] = post_shift;
562 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
563 || post_shift >= prec
564 || pre_shifts[i] >= prec)
565 this_mode = -2;
567 if (i == 0)
568 mode = this_mode;
569 else if (mode != this_mode)
570 mode = -2;
573 vec = XALLOCAVEC (tree, nunits);
575 if (use_pow2)
577 tree addend = NULL_TREE;
578 if (!unsignedp)
580 tree uns_type;
582 /* Both division and remainder sequences need
583 op0 < 0 ? mask : 0 computed. It can be either computed as
584 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
585 if none of the shifts is 0, or as the conditional. */
586 for (i = 0; i < nunits; i++)
587 if (shifts[i] == 0)
588 break;
589 uns_type
590 = build_vector_type (build_nonstandard_integer_type (prec, 1),
591 nunits);
592 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
594 for (i = 0; i < nunits; i++)
595 shift_temps[i] = prec - 1;
596 cur_op = add_rshift (gsi, type, op0, shift_temps);
597 if (cur_op != NULL_TREE)
599 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
600 uns_type, cur_op);
601 for (i = 0; i < nunits; i++)
602 shift_temps[i] = prec - shifts[i];
603 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
604 if (cur_op != NULL_TREE)
605 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
606 type, cur_op);
609 if (addend == NULL_TREE
610 && expand_vec_cond_expr_p (type, type))
612 tree zero, cst, cond;
613 gimple stmt;
615 zero = build_zero_cst (type);
616 cond = build2 (LT_EXPR, type, op0, zero);
617 for (i = 0; i < nunits; i++)
618 vec[i] = build_int_cst (TREE_TYPE (type),
619 ((unsigned HOST_WIDE_INT) 1
620 << shifts[i]) - 1);
621 cst = build_vector (type, vec);
622 addend = make_ssa_name (type, NULL);
623 stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend,
624 cond, cst, zero);
625 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
628 if (code == TRUNC_DIV_EXPR)
630 if (unsignedp)
632 /* q = op0 >> shift; */
633 cur_op = add_rshift (gsi, type, op0, shifts);
634 if (cur_op != NULL_TREE)
635 return cur_op;
637 else if (addend != NULL_TREE)
639 /* t1 = op0 + addend;
640 q = t1 >> shift; */
641 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
642 if (op != unknown_optab
643 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
645 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
646 cur_op = add_rshift (gsi, type, cur_op, shifts);
647 if (cur_op != NULL_TREE)
648 return cur_op;
652 else
654 tree mask;
655 for (i = 0; i < nunits; i++)
656 vec[i] = build_int_cst (TREE_TYPE (type),
657 ((unsigned HOST_WIDE_INT) 1
658 << shifts[i]) - 1);
659 mask = build_vector (type, vec);
660 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
661 if (op != unknown_optab
662 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
664 if (unsignedp)
665 /* r = op0 & mask; */
666 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
667 else if (addend != NULL_TREE)
669 /* t1 = op0 + addend;
670 t2 = t1 & mask;
671 r = t2 - addend; */
672 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
673 if (op != unknown_optab
674 && optab_handler (op, TYPE_MODE (type))
675 != CODE_FOR_nothing)
677 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
678 addend);
679 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
680 cur_op, mask);
681 op = optab_for_tree_code (MINUS_EXPR, type,
682 optab_default);
683 if (op != unknown_optab
684 && optab_handler (op, TYPE_MODE (type))
685 != CODE_FOR_nothing)
686 return gimplify_build2 (gsi, MINUS_EXPR, type,
687 cur_op, addend);
694 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
695 return NULL_TREE;
697 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
698 return NULL_TREE;
700 cur_op = op0;
702 switch (mode)
704 case 0:
705 gcc_assert (unsignedp);
706 /* t1 = oprnd0 >> pre_shift;
707 t2 = t1 h* ml;
708 q = t2 >> post_shift; */
709 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
710 if (cur_op == NULL_TREE)
711 return NULL_TREE;
712 break;
713 case 1:
714 gcc_assert (unsignedp);
715 for (i = 0; i < nunits; i++)
717 shift_temps[i] = 1;
718 post_shifts[i]--;
720 break;
721 case 2:
722 case 3:
723 case 4:
724 case 5:
725 gcc_assert (!unsignedp);
726 for (i = 0; i < nunits; i++)
727 shift_temps[i] = prec - 1;
728 break;
729 default:
730 return NULL_TREE;
733 for (i = 0; i < nunits; i++)
734 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
735 mulcst = build_vector (type, vec);
737 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
739 switch (mode)
741 case 0:
742 /* t1 = oprnd0 >> pre_shift;
743 t2 = t1 h* ml;
744 q = t2 >> post_shift; */
745 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
746 break;
747 case 1:
748 /* t1 = oprnd0 h* ml;
749 t2 = oprnd0 - t1;
750 t3 = t2 >> 1;
751 t4 = t1 + t3;
752 q = t4 >> (post_shift - 1); */
753 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
754 if (op == unknown_optab
755 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
756 return NULL_TREE;
757 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
758 tem = add_rshift (gsi, type, tem, shift_temps);
759 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
760 if (op == unknown_optab
761 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
762 return NULL_TREE;
763 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
764 cur_op = add_rshift (gsi, type, tem, post_shifts);
765 if (cur_op == NULL_TREE)
766 return NULL_TREE;
767 break;
768 case 2:
769 case 3:
770 case 4:
771 case 5:
772 /* t1 = oprnd0 h* ml;
773 t2 = t1; [ iff (mode & 2) != 0 ]
774 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
775 t3 = t2 >> post_shift;
776 t4 = oprnd0 >> (prec - 1);
777 q = t3 - t4; [ iff (mode & 1) == 0 ]
778 q = t4 - t3; [ iff (mode & 1) != 0 ] */
779 if ((mode & 2) == 0)
781 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
782 if (op == unknown_optab
783 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
784 return NULL_TREE;
785 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
787 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
788 if (cur_op == NULL_TREE)
789 return NULL_TREE;
790 tem = add_rshift (gsi, type, op0, shift_temps);
791 if (tem == NULL_TREE)
792 return NULL_TREE;
793 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
794 if (op == unknown_optab
795 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
796 return NULL_TREE;
797 if ((mode & 1) == 0)
798 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
799 else
800 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
801 break;
802 default:
803 gcc_unreachable ();
806 if (code == TRUNC_DIV_EXPR)
807 return cur_op;
809 /* We divided. Now finish by:
810 t1 = q * oprnd1;
811 r = oprnd0 - t1; */
812 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
813 if (op == unknown_optab
814 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
815 return NULL_TREE;
816 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
817 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
818 if (op == unknown_optab
819 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
820 return NULL_TREE;
821 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
824 /* Expand a vector condition to scalars, by using many conditions
825 on the vector's elements. */
826 static void
827 expand_vector_condition (gimple_stmt_iterator *gsi)
829 gimple stmt = gsi_stmt (*gsi);
830 tree type = gimple_expr_type (stmt);
831 tree a = gimple_assign_rhs1 (stmt);
832 tree a1 = a;
833 tree a2;
834 bool a_is_comparison = false;
835 tree b = gimple_assign_rhs2 (stmt);
836 tree c = gimple_assign_rhs3 (stmt);
837 vec<constructor_elt, va_gc> *v;
838 tree constr;
839 tree inner_type = TREE_TYPE (type);
840 tree cond_type = TREE_TYPE (TREE_TYPE (a));
841 tree comp_inner_type = cond_type;
842 tree width = TYPE_SIZE (inner_type);
843 tree index = bitsize_int (0);
844 int nunits = TYPE_VECTOR_SUBPARTS (type);
845 int i;
846 location_t loc = gimple_location (gsi_stmt (*gsi));
848 if (!is_gimple_val (a))
850 gcc_assert (COMPARISON_CLASS_P (a));
851 a_is_comparison = true;
852 a1 = TREE_OPERAND (a, 0);
853 a2 = TREE_OPERAND (a, 1);
854 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
857 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
858 return;
860 /* TODO: try and find a smaller vector type. */
862 warning_at (loc, OPT_Wvector_operation_performance,
863 "vector condition will be expanded piecewise");
865 vec_alloc (v, nunits);
866 for (i = 0; i < nunits;
867 i++, index = int_const_binop (PLUS_EXPR, index, width))
869 tree aa, result;
870 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
871 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
872 if (a_is_comparison)
874 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
875 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
876 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
878 else
879 aa = tree_vec_extract (gsi, cond_type, a, width, index);
880 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
881 constructor_elt ce = {NULL_TREE, result};
882 v->quick_push (ce);
885 constr = build_constructor (type, v);
886 gimple_assign_set_rhs_from_tree (gsi, constr);
887 update_stmt (gsi_stmt (*gsi));
890 static tree
891 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
892 gimple assign, enum tree_code code)
894 enum machine_mode compute_mode = TYPE_MODE (compute_type);
896 /* If the compute mode is not a vector mode (hence we are not decomposing
897 a BLKmode vector to smaller, hardware-supported vectors), we may want
898 to expand the operations in parallel. */
899 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
900 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
901 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
902 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
903 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
904 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
905 switch (code)
907 case PLUS_EXPR:
908 case MINUS_EXPR:
909 if (!TYPE_OVERFLOW_TRAPS (type))
910 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
911 gimple_assign_rhs1 (assign),
912 gimple_assign_rhs2 (assign), code);
913 break;
915 case NEGATE_EXPR:
916 if (!TYPE_OVERFLOW_TRAPS (type))
917 return expand_vector_addition (gsi, do_unop, do_negate, type,
918 gimple_assign_rhs1 (assign),
919 NULL_TREE, code);
920 break;
922 case BIT_AND_EXPR:
923 case BIT_IOR_EXPR:
924 case BIT_XOR_EXPR:
925 return expand_vector_parallel (gsi, do_binop, type,
926 gimple_assign_rhs1 (assign),
927 gimple_assign_rhs2 (assign), code);
929 case BIT_NOT_EXPR:
930 return expand_vector_parallel (gsi, do_unop, type,
931 gimple_assign_rhs1 (assign),
932 NULL_TREE, code);
933 case EQ_EXPR:
934 case NE_EXPR:
935 case GT_EXPR:
936 case LT_EXPR:
937 case GE_EXPR:
938 case LE_EXPR:
939 case UNEQ_EXPR:
940 case UNGT_EXPR:
941 case UNLT_EXPR:
942 case UNGE_EXPR:
943 case UNLE_EXPR:
944 case LTGT_EXPR:
945 case ORDERED_EXPR:
946 case UNORDERED_EXPR:
948 tree rhs1 = gimple_assign_rhs1 (assign);
949 tree rhs2 = gimple_assign_rhs2 (assign);
951 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
954 case TRUNC_DIV_EXPR:
955 case TRUNC_MOD_EXPR:
957 tree rhs1 = gimple_assign_rhs1 (assign);
958 tree rhs2 = gimple_assign_rhs2 (assign);
959 tree ret;
961 if (!optimize
962 || !VECTOR_INTEGER_TYPE_P (type)
963 || TREE_CODE (rhs2) != VECTOR_CST)
964 break;
966 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
967 if (ret != NULL_TREE)
968 return ret;
969 break;
972 default:
973 break;
976 if (TREE_CODE_CLASS (code) == tcc_unary)
977 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
978 gimple_assign_rhs1 (assign),
979 NULL_TREE, code);
980 else
981 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
982 gimple_assign_rhs1 (assign),
983 gimple_assign_rhs2 (assign), code);
986 /* Return a type for the widest vector mode whose components are of type
987 TYPE, or NULL_TREE if none is found. */
989 static tree
990 type_for_widest_vector_mode (tree type, optab op)
992 enum machine_mode inner_mode = TYPE_MODE (type);
993 enum machine_mode best_mode = VOIDmode, mode;
994 int best_nunits = 0;
996 if (SCALAR_FLOAT_MODE_P (inner_mode))
997 mode = MIN_MODE_VECTOR_FLOAT;
998 else if (SCALAR_FRACT_MODE_P (inner_mode))
999 mode = MIN_MODE_VECTOR_FRACT;
1000 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1001 mode = MIN_MODE_VECTOR_UFRACT;
1002 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1003 mode = MIN_MODE_VECTOR_ACCUM;
1004 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1005 mode = MIN_MODE_VECTOR_UACCUM;
1006 else
1007 mode = MIN_MODE_VECTOR_INT;
1009 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1010 if (GET_MODE_INNER (mode) == inner_mode
1011 && GET_MODE_NUNITS (mode) > best_nunits
1012 && optab_handler (op, mode) != CODE_FOR_nothing)
1013 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1015 if (best_mode == VOIDmode)
1016 return NULL_TREE;
1017 else
1018 return build_vector_type_for_mode (type, best_mode);
1022 /* Build a reference to the element of the vector VECT. Function
1023 returns either the element itself, either BIT_FIELD_REF, or an
1024 ARRAY_REF expression.
1026 GSI is required to insert temporary variables while building a
1027 refernece to the element of the vector VECT.
1029 PTMPVEC is a pointer to the temporary variable for caching
1030 purposes. In case when PTMPVEC is NULL new temporary variable
1031 will be created. */
1032 static tree
1033 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1035 tree vect_type, vect_elt_type;
1036 gimple asgn;
1037 tree tmpvec;
1038 tree arraytype;
1039 bool need_asgn = true;
1040 unsigned int elements;
1042 vect_type = TREE_TYPE (vect);
1043 vect_elt_type = TREE_TYPE (vect_type);
1044 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1046 if (TREE_CODE (idx) == INTEGER_CST)
1048 unsigned HOST_WIDE_INT index;
1050 /* Given that we're about to compute a binary modulus,
1051 we don't care about the high bits of the value. */
1052 index = TREE_INT_CST_LOW (idx);
1053 if (!host_integerp (idx, 1) || index >= elements)
1055 index &= elements - 1;
1056 idx = build_int_cst (TREE_TYPE (idx), index);
1059 /* When lowering a vector statement sequence do some easy
1060 simplification by looking through intermediate vector results. */
1061 if (TREE_CODE (vect) == SSA_NAME)
1063 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1064 if (is_gimple_assign (def_stmt)
1065 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1066 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1067 vect = gimple_assign_rhs1 (def_stmt);
1070 if (TREE_CODE (vect) == VECTOR_CST)
1071 return VECTOR_CST_ELT (vect, index);
1072 else if (TREE_CODE (vect) == CONSTRUCTOR
1073 && (CONSTRUCTOR_NELTS (vect) == 0
1074 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1075 != VECTOR_TYPE))
1077 if (index < CONSTRUCTOR_NELTS (vect))
1078 return CONSTRUCTOR_ELT (vect, index)->value;
1079 return build_zero_cst (vect_elt_type);
1081 else
1083 tree size = TYPE_SIZE (vect_elt_type);
1084 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1085 size);
1086 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1090 if (!ptmpvec)
1091 tmpvec = create_tmp_var (vect_type, "vectmp");
1092 else if (!*ptmpvec)
1093 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1094 else
1096 tmpvec = *ptmpvec;
1097 need_asgn = false;
1100 if (need_asgn)
1102 TREE_ADDRESSABLE (tmpvec) = 1;
1103 asgn = gimple_build_assign (tmpvec, vect);
1104 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1107 arraytype = build_array_type_nelts (vect_elt_type, elements);
1108 return build4 (ARRAY_REF, vect_elt_type,
1109 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1110 idx, NULL_TREE, NULL_TREE);
1113 /* Check if VEC_PERM_EXPR within the given setting is supported
1114 by hardware, or lower it piecewise.
1116 When VEC_PERM_EXPR has the same first and second operands:
1117 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1118 {v0[mask[0]], v0[mask[1]], ...}
1119 MASK and V0 must have the same number of elements.
1121 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1122 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1123 V0 and V1 must have the same type. MASK, V0, V1 must have the
1124 same number of arguments. */
1126 static void
1127 lower_vec_perm (gimple_stmt_iterator *gsi)
1129 gimple stmt = gsi_stmt (*gsi);
1130 tree mask = gimple_assign_rhs3 (stmt);
1131 tree vec0 = gimple_assign_rhs1 (stmt);
1132 tree vec1 = gimple_assign_rhs2 (stmt);
1133 tree vect_type = TREE_TYPE (vec0);
1134 tree mask_type = TREE_TYPE (mask);
1135 tree vect_elt_type = TREE_TYPE (vect_type);
1136 tree mask_elt_type = TREE_TYPE (mask_type);
1137 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1138 vec<constructor_elt, va_gc> *v;
1139 tree constr, t, si, i_val;
1140 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1141 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1142 location_t loc = gimple_location (gsi_stmt (*gsi));
1143 unsigned i;
1145 if (TREE_CODE (mask) == SSA_NAME)
1147 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1148 if (is_gimple_assign (def_stmt)
1149 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1150 mask = gimple_assign_rhs1 (def_stmt);
1153 if (TREE_CODE (mask) == VECTOR_CST)
1155 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1157 for (i = 0; i < elements; ++i)
1158 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1159 & (2 * elements - 1));
1161 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1163 gimple_assign_set_rhs3 (stmt, mask);
1164 update_stmt (stmt);
1165 return;
1168 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1169 return;
1171 warning_at (loc, OPT_Wvector_operation_performance,
1172 "vector shuffling operation will be expanded piecewise");
1174 vec_alloc (v, elements);
1175 for (i = 0; i < elements; i++)
1177 si = size_int (i);
1178 i_val = vector_element (gsi, mask, si, &masktmp);
1180 if (TREE_CODE (i_val) == INTEGER_CST)
1182 unsigned HOST_WIDE_INT index;
1184 index = TREE_INT_CST_LOW (i_val);
1185 if (!host_integerp (i_val, 1) || index >= elements)
1186 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1188 if (two_operand_p && (index & elements) != 0)
1189 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1190 else
1191 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1193 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1194 true, GSI_SAME_STMT);
1196 else
1198 tree cond = NULL_TREE, v0_val;
1200 if (two_operand_p)
1202 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1203 build_int_cst (mask_elt_type, elements));
1204 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1205 true, GSI_SAME_STMT);
1208 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1209 build_int_cst (mask_elt_type, elements - 1));
1210 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1211 true, GSI_SAME_STMT);
1213 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1214 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1215 true, GSI_SAME_STMT);
1217 if (two_operand_p)
1219 tree v1_val;
1221 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1222 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1223 true, GSI_SAME_STMT);
1225 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1226 cond, build_zero_cst (mask_elt_type));
1227 cond = fold_build3 (COND_EXPR, vect_elt_type,
1228 cond, v0_val, v1_val);
1229 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1230 true, GSI_SAME_STMT);
1232 else
1233 t = v0_val;
1236 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1239 constr = build_constructor (vect_type, v);
1240 gimple_assign_set_rhs_from_tree (gsi, constr);
1241 update_stmt (gsi_stmt (*gsi));
1244 /* Process one statement. If we identify a vector operation, expand it. */
1246 static void
1247 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1249 gimple stmt = gsi_stmt (*gsi);
1250 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
1251 enum tree_code code;
1252 enum machine_mode compute_mode;
1253 optab op = unknown_optab;
1254 enum gimple_rhs_class rhs_class;
1255 tree new_rhs;
1257 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1258 return;
1260 code = gimple_assign_rhs_code (stmt);
1261 rhs_class = get_gimple_rhs_class (code);
1262 lhs = gimple_assign_lhs (stmt);
1264 if (code == VEC_PERM_EXPR)
1266 lower_vec_perm (gsi);
1267 return;
1270 if (code == VEC_COND_EXPR)
1272 expand_vector_condition (gsi);
1273 return;
1275 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1276 return;
1278 rhs1 = gimple_assign_rhs1 (stmt);
1279 type = gimple_expr_type (stmt);
1280 if (rhs_class == GIMPLE_BINARY_RHS)
1281 rhs2 = gimple_assign_rhs2 (stmt);
1283 if (TREE_CODE (type) != VECTOR_TYPE)
1284 return;
1286 if (code == NOP_EXPR
1287 || code == FLOAT_EXPR
1288 || code == FIX_TRUNC_EXPR
1289 || code == VIEW_CONVERT_EXPR)
1290 return;
1292 gcc_assert (code != CONVERT_EXPR);
1294 /* The signedness is determined from input argument. */
1295 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1296 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1297 type = TREE_TYPE (rhs1);
1299 /* For widening/narrowing vector operations, the relevant type is of the
1300 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1301 calculated in the same way above. */
1302 if (code == WIDEN_SUM_EXPR
1303 || code == VEC_WIDEN_MULT_HI_EXPR
1304 || code == VEC_WIDEN_MULT_LO_EXPR
1305 || code == VEC_WIDEN_MULT_EVEN_EXPR
1306 || code == VEC_WIDEN_MULT_ODD_EXPR
1307 || code == VEC_UNPACK_HI_EXPR
1308 || code == VEC_UNPACK_LO_EXPR
1309 || code == VEC_PACK_TRUNC_EXPR
1310 || code == VEC_PACK_SAT_EXPR
1311 || code == VEC_PACK_FIX_TRUNC_EXPR
1312 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1313 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1314 type = TREE_TYPE (rhs1);
1316 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1317 scalar */
1318 if (code == LSHIFT_EXPR
1319 || code == RSHIFT_EXPR
1320 || code == LROTATE_EXPR
1321 || code == RROTATE_EXPR)
1323 optab opv;
1325 /* Check whether we have vector <op> {x,x,x,x} where x
1326 could be a scalar variable or a constant. Transform
1327 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1328 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1330 tree first;
1331 gimple def_stmt;
1333 if ((TREE_CODE (rhs2) == VECTOR_CST
1334 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1335 || (TREE_CODE (rhs2) == SSA_NAME
1336 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1337 && gimple_assign_single_p (def_stmt)
1338 && (first = uniform_vector_p
1339 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1341 gimple_assign_set_rhs2 (stmt, first);
1342 update_stmt (stmt);
1343 rhs2 = first;
1347 opv = optab_for_tree_code (code, type, optab_vector);
1348 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1349 op = opv;
1350 else
1352 op = optab_for_tree_code (code, type, optab_scalar);
1354 /* The rtl expander will expand vector/scalar as vector/vector
1355 if necessary. Don't bother converting the stmt here. */
1356 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
1357 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
1358 return;
1361 else
1362 op = optab_for_tree_code (code, type, optab_default);
1364 /* Optabs will try converting a negation into a subtraction, so
1365 look for it as well. TODO: negation of floating-point vectors
1366 might be turned into an exclusive OR toggling the sign bit. */
1367 if (op == unknown_optab
1368 && code == NEGATE_EXPR
1369 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1370 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1372 /* For very wide vectors, try using a smaller vector mode. */
1373 compute_type = type;
1374 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
1376 tree vector_compute_type
1377 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1378 if (vector_compute_type != NULL_TREE
1379 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1380 < TYPE_VECTOR_SUBPARTS (compute_type))
1381 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1382 != CODE_FOR_nothing))
1383 compute_type = vector_compute_type;
1386 /* If we are breaking a BLKmode vector into smaller pieces,
1387 type_for_widest_vector_mode has already looked into the optab,
1388 so skip these checks. */
1389 if (compute_type == type)
1391 compute_mode = TYPE_MODE (compute_type);
1392 if (VECTOR_MODE_P (compute_mode))
1394 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1395 return;
1396 if (code == MULT_HIGHPART_EXPR
1397 && can_mult_highpart_p (compute_mode,
1398 TYPE_UNSIGNED (compute_type)))
1399 return;
1401 /* There is no operation in hardware, so fall back to scalars. */
1402 compute_type = TREE_TYPE (type);
1405 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
1406 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1408 /* Leave expression untouched for later expansion. */
1409 if (new_rhs == NULL_TREE)
1410 return;
1412 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1413 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1414 new_rhs);
1416 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1417 way to do it is change expand_vector_operation and its callees to
1418 return a tree_code, RHS1 and RHS2 instead of a tree. */
1419 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1420 update_stmt (gsi_stmt (*gsi));
1423 /* Use this to lower vector operations introduced by the vectorizer,
1424 if it may need the bit-twiddling tricks implemented in this file. */
1426 static bool
1427 gate_expand_vector_operations_ssa (void)
1429 return !(cfun->curr_properties & PROP_gimple_lvec);
1432 static unsigned int
1433 expand_vector_operations (void)
1435 gimple_stmt_iterator gsi;
1436 basic_block bb;
1437 bool cfg_changed = false;
1439 FOR_EACH_BB (bb)
1441 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1443 expand_vector_operations_1 (&gsi);
1444 /* ??? If we do not cleanup EH then we will ICE in
1445 verification. But in reality we have created wrong-code
1446 as we did not properly transition EH info and edges to
1447 the piecewise computations. */
1448 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1449 && gimple_purge_dead_eh_edges (bb))
1450 cfg_changed = true;
1454 return cfg_changed ? TODO_cleanup_cfg : 0;
1457 namespace {
1459 const pass_data pass_data_lower_vector =
1461 GIMPLE_PASS, /* type */
1462 "veclower", /* name */
1463 OPTGROUP_VEC, /* optinfo_flags */
1464 true, /* has_gate */
1465 true, /* has_execute */
1466 TV_NONE, /* tv_id */
1467 PROP_cfg, /* properties_required */
1468 PROP_gimple_lvec, /* properties_provided */
1469 0, /* properties_destroyed */
1470 0, /* todo_flags_start */
1471 ( TODO_update_ssa | TODO_verify_ssa
1472 | TODO_verify_stmts
1473 | TODO_verify_flow
1474 | TODO_cleanup_cfg ), /* todo_flags_finish */
1477 class pass_lower_vector : public gimple_opt_pass
1479 public:
1480 pass_lower_vector(gcc::context *ctxt)
1481 : gimple_opt_pass(pass_data_lower_vector, ctxt)
1484 /* opt_pass methods: */
1485 bool gate () { return gate_expand_vector_operations_ssa (); }
1486 unsigned int execute () { return expand_vector_operations (); }
1488 }; // class pass_lower_vector
1490 } // anon namespace
1492 gimple_opt_pass *
1493 make_pass_lower_vector (gcc::context *ctxt)
1495 return new pass_lower_vector (ctxt);
1498 namespace {
1500 const pass_data pass_data_lower_vector_ssa =
1502 GIMPLE_PASS, /* type */
1503 "veclower2", /* name */
1504 OPTGROUP_VEC, /* optinfo_flags */
1505 false, /* has_gate */
1506 true, /* has_execute */
1507 TV_NONE, /* tv_id */
1508 PROP_cfg, /* properties_required */
1509 PROP_gimple_lvec, /* properties_provided */
1510 0, /* properties_destroyed */
1511 0, /* todo_flags_start */
1512 ( TODO_update_ssa | TODO_verify_ssa
1513 | TODO_verify_stmts
1514 | TODO_verify_flow
1515 | TODO_cleanup_cfg ), /* todo_flags_finish */
1518 class pass_lower_vector_ssa : public gimple_opt_pass
1520 public:
1521 pass_lower_vector_ssa(gcc::context *ctxt)
1522 : gimple_opt_pass(pass_data_lower_vector_ssa, ctxt)
1525 /* opt_pass methods: */
1526 opt_pass * clone () { return new pass_lower_vector_ssa (ctxt_); }
1527 unsigned int execute () { return expand_vector_operations (); }
1529 }; // class pass_lower_vector_ssa
1531 } // anon namespace
1533 gimple_opt_pass *
1534 make_pass_lower_vector_ssa (gcc::context *ctxt)
1536 return new pass_lower_vector_ssa (ctxt);
1539 #include "gt-tree-vect-generic.h"