* testsuite/17_intro/static.cc: Ignore AIX TOC reload warnings.
[official-gcc.git] / gcc / tree-vect-generic.c
blob735689b4c18b2ab31bddce068875db06acf4d8f2
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tree.h"
24 #include "stor-layout.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "gimple.h"
28 #include "gimple-iterator.h"
29 #include "gimplify-me.h"
30 #include "gimple-ssa.h"
31 #include "tree-cfg.h"
32 #include "stringpool.h"
33 #include "tree-ssanames.h"
34 #include "tree-iterator.h"
35 #include "tree-pass.h"
36 #include "flags.h"
37 #include "ggc.h"
38 #include "diagnostic.h"
39 #include "target.h"
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
42 #include "expr.h"
43 #include "optabs.h"
46 static void expand_vector_operations_1 (gimple_stmt_iterator *);
49 /* Build a constant of type TYPE, made of VALUE's bits replicated
50 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
51 static tree
52 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
54 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
55 int n = HOST_BITS_PER_WIDE_INT / width;
56 unsigned HOST_WIDE_INT low, high, mask;
57 tree ret;
59 gcc_assert (n);
61 if (width == HOST_BITS_PER_WIDE_INT)
62 low = value;
63 else
65 mask = ((HOST_WIDE_INT)1 << width) - 1;
66 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
69 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
70 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
71 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
72 high = 0;
73 else if (TYPE_PRECISION (type) == HOST_BITS_PER_DOUBLE_INT)
74 high = low;
75 else
76 gcc_unreachable ();
78 ret = build_int_cst_wide (type, low, high);
79 return ret;
82 static GTY(()) tree vector_inner_type;
83 static GTY(()) tree vector_last_type;
84 static GTY(()) int vector_last_nunits;
86 /* Return a suitable vector types made of SUBPARTS units each of mode
87 "word_mode" (the global variable). */
88 static tree
89 build_word_mode_vector_type (int nunits)
91 if (!vector_inner_type)
92 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
93 else if (vector_last_nunits == nunits)
95 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
96 return vector_last_type;
99 /* We build a new type, but we canonicalize it nevertheless,
100 because it still saves some memory. */
101 vector_last_nunits = nunits;
102 vector_last_type = type_hash_canon (nunits,
103 build_vector_type (vector_inner_type,
104 nunits));
105 return vector_last_type;
108 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
109 tree, tree, tree, tree, tree, enum tree_code);
111 static inline tree
112 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
113 tree t, tree bitsize, tree bitpos)
115 if (bitpos)
116 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
117 else
118 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
121 static tree
122 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
123 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
124 enum tree_code code)
126 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
127 return gimplify_build1 (gsi, code, inner_type, a);
130 static tree
131 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
132 tree bitpos, tree bitsize, enum tree_code code)
134 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
135 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
136 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
137 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
138 return gimplify_build2 (gsi, code, inner_type, a, b);
141 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
143 INNER_TYPE is the type of A and B elements
145 returned expression is of signed integer type with the
146 size equal to the size of INNER_TYPE. */
147 static tree
148 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
149 tree bitpos, tree bitsize, enum tree_code code)
151 tree comp_type;
153 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
154 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
156 comp_type = build_nonstandard_integer_type
157 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
159 return gimplify_build3 (gsi, COND_EXPR, comp_type,
160 fold_build2 (code, boolean_type_node, a, b),
161 build_int_cst (comp_type, -1),
162 build_int_cst (comp_type, 0));
165 /* Expand vector addition to scalars. This does bit twiddling
166 in order to increase parallelism:
168 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
169 (a ^ b) & 0x80808080
171 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
172 (a ^ ~b) & 0x80808080
174 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
176 This optimization should be done only if 4 vector items or more
177 fit into a word. */
178 static tree
179 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
180 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
181 enum tree_code code)
183 tree inner_type = TREE_TYPE (TREE_TYPE (a));
184 unsigned HOST_WIDE_INT max;
185 tree low_bits, high_bits, a_low, b_low, result_low, signs;
187 max = GET_MODE_MASK (TYPE_MODE (inner_type));
188 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
189 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
191 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
192 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
194 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
195 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
196 if (code == PLUS_EXPR)
197 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
198 else
200 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
201 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
204 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
205 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
206 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
209 static tree
210 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
211 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
212 tree bitsize ATTRIBUTE_UNUSED,
213 enum tree_code code ATTRIBUTE_UNUSED)
215 tree inner_type = TREE_TYPE (TREE_TYPE (b));
216 HOST_WIDE_INT max;
217 tree low_bits, high_bits, b_low, result_low, signs;
219 max = GET_MODE_MASK (TYPE_MODE (inner_type));
220 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
221 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
223 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
225 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
226 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
227 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
228 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
229 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
232 /* Expand a vector operation to scalars, by using many operations
233 whose type is the vector type's inner type. */
234 static tree
235 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
236 tree type, tree inner_type,
237 tree a, tree b, enum tree_code code)
239 vec<constructor_elt, va_gc> *v;
240 tree part_width = TYPE_SIZE (inner_type);
241 tree index = bitsize_int (0);
242 int nunits = TYPE_VECTOR_SUBPARTS (type);
243 int delta = tree_to_uhwi (part_width)
244 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
245 int i;
246 location_t loc = gimple_location (gsi_stmt (*gsi));
248 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
249 warning_at (loc, OPT_Wvector_operation_performance,
250 "vector operation will be expanded piecewise");
251 else
252 warning_at (loc, OPT_Wvector_operation_performance,
253 "vector operation will be expanded in parallel");
255 vec_alloc (v, (nunits + delta - 1) / delta);
256 for (i = 0; i < nunits;
257 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
259 tree result = f (gsi, inner_type, a, b, index, part_width, code);
260 constructor_elt ce = {NULL_TREE, result};
261 v->quick_push (ce);
264 return build_constructor (type, v);
267 /* Expand a vector operation to scalars with the freedom to use
268 a scalar integer type, or to use a different size for the items
269 in the vector type. */
270 static tree
271 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
272 tree a, tree b,
273 enum tree_code code)
275 tree result, compute_type;
276 enum machine_mode mode;
277 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
278 location_t loc = gimple_location (gsi_stmt (*gsi));
280 /* We have three strategies. If the type is already correct, just do
281 the operation an element at a time. Else, if the vector is wider than
282 one word, do it a word at a time; finally, if the vector is smaller
283 than one word, do it as a scalar. */
284 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
285 return expand_vector_piecewise (gsi, f,
286 type, TREE_TYPE (type),
287 a, b, code);
288 else if (n_words > 1)
290 tree word_type = build_word_mode_vector_type (n_words);
291 result = expand_vector_piecewise (gsi, f,
292 word_type, TREE_TYPE (word_type),
293 a, b, code);
294 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
295 GSI_SAME_STMT);
297 else
299 /* Use a single scalar operation with a mode no wider than word_mode. */
300 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
301 compute_type = lang_hooks.types.type_for_mode (mode, 1);
302 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
303 warning_at (loc, OPT_Wvector_operation_performance,
304 "vector operation will be expanded with a "
305 "single scalar operation");
308 return result;
311 /* Expand a vector operation to scalars; for integer types we can use
312 special bit twiddling tricks to do the sums a word at a time, using
313 function F_PARALLEL instead of F. These tricks are done only if
314 they can process at least four items, that is, only if the vector
315 holds at least four items and if a word can hold four items. */
316 static tree
317 expand_vector_addition (gimple_stmt_iterator *gsi,
318 elem_op_func f, elem_op_func f_parallel,
319 tree type, tree a, tree b, enum tree_code code)
321 int parts_per_word = UNITS_PER_WORD
322 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
324 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
325 && parts_per_word >= 4
326 && TYPE_VECTOR_SUBPARTS (type) >= 4)
327 return expand_vector_parallel (gsi, f_parallel,
328 type, a, b, code);
329 else
330 return expand_vector_piecewise (gsi, f,
331 type, TREE_TYPE (type),
332 a, b, code);
335 /* Try to expand vector comparison expression OP0 CODE OP1 by
336 querying optab if the following expression:
337 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
338 can be expanded. */
339 static tree
340 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
341 tree op1, enum tree_code code)
343 tree t;
344 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
345 t = expand_vector_piecewise (gsi, do_compare, type,
346 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
347 else
348 t = NULL_TREE;
350 return t;
353 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
354 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
355 the result if successful, otherwise return NULL_TREE. */
356 static tree
357 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
359 optab op;
360 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
361 bool scalar_shift = true;
363 for (i = 1; i < nunits; i++)
365 if (shiftcnts[i] != shiftcnts[0])
366 scalar_shift = false;
369 if (scalar_shift && shiftcnts[0] == 0)
370 return op0;
372 if (scalar_shift)
374 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
375 if (op != unknown_optab
376 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
377 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
378 build_int_cst (NULL_TREE, shiftcnts[0]));
381 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
382 if (op != unknown_optab
383 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
385 tree *vec = XALLOCAVEC (tree, nunits);
386 for (i = 0; i < nunits; i++)
387 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
388 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
389 build_vector (type, vec));
392 return NULL_TREE;
395 /* Try to expand integer vector division by constant using
396 widening multiply, shifts and additions. */
397 static tree
398 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
399 tree op1, enum tree_code code)
401 bool use_pow2 = true;
402 bool has_vector_shift = true;
403 int mode = -1, this_mode;
404 int pre_shift = -1, post_shift;
405 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
406 int *shifts = XALLOCAVEC (int, nunits * 4);
407 int *pre_shifts = shifts + nunits;
408 int *post_shifts = pre_shifts + nunits;
409 int *shift_temps = post_shifts + nunits;
410 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
411 int prec = TYPE_PRECISION (TREE_TYPE (type));
412 int dummy_int;
413 unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
414 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
415 tree *vec;
416 tree cur_op, mulcst, tem;
417 optab op;
419 if (prec > HOST_BITS_PER_WIDE_INT)
420 return NULL_TREE;
422 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
423 if (op == unknown_optab
424 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
425 has_vector_shift = false;
427 /* Analysis phase. Determine if all op1 elements are either power
428 of two and it is possible to expand it using shifts (or for remainder
429 using masking). Additionally compute the multiplicative constants
430 and pre and post shifts if the division is to be expanded using
431 widening or high part multiplication plus shifts. */
432 for (i = 0; i < nunits; i++)
434 tree cst = VECTOR_CST_ELT (op1, i);
435 unsigned HOST_WIDE_INT ml;
437 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
438 return NULL_TREE;
439 pre_shifts[i] = 0;
440 post_shifts[i] = 0;
441 mulc[i] = 0;
442 if (use_pow2
443 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
444 use_pow2 = false;
445 if (use_pow2)
447 shifts[i] = tree_log2 (cst);
448 if (shifts[i] != shifts[0]
449 && code == TRUNC_DIV_EXPR
450 && !has_vector_shift)
451 use_pow2 = false;
453 if (mode == -2)
454 continue;
455 if (unsignedp)
457 unsigned HOST_WIDE_INT mh;
458 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
460 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
461 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
462 return NULL_TREE;
464 if (d <= 1)
466 mode = -2;
467 continue;
470 /* Find a suitable multiplier and right shift count
471 instead of multiplying with D. */
472 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
474 /* If the suggested multiplier is more than SIZE bits, we can
475 do better for even divisors, using an initial right shift. */
476 if ((mh != 0 && (d & 1) == 0)
477 || (!has_vector_shift && pre_shift != -1))
479 if (has_vector_shift)
480 pre_shift = floor_log2 (d & -d);
481 else if (pre_shift == -1)
483 unsigned int j;
484 for (j = 0; j < nunits; j++)
486 tree cst2 = VECTOR_CST_ELT (op1, j);
487 unsigned HOST_WIDE_INT d2;
488 int this_pre_shift;
490 if (!tree_fits_uhwi_p (cst2))
491 return NULL_TREE;
492 d2 = tree_to_uhwi (cst2) & mask;
493 if (d2 == 0)
494 return NULL_TREE;
495 this_pre_shift = floor_log2 (d2 & -d2);
496 if (pre_shift == -1 || this_pre_shift < pre_shift)
497 pre_shift = this_pre_shift;
499 if (i != 0 && pre_shift != 0)
501 /* Restart. */
502 i = -1U;
503 mode = -1;
504 continue;
507 if (pre_shift != 0)
509 if ((d >> pre_shift) <= 1)
511 mode = -2;
512 continue;
514 mh = choose_multiplier (d >> pre_shift, prec,
515 prec - pre_shift,
516 &ml, &post_shift, &dummy_int);
517 gcc_assert (!mh);
518 pre_shifts[i] = pre_shift;
521 if (!mh)
522 this_mode = 0;
523 else
524 this_mode = 1;
526 else
528 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
529 unsigned HOST_WIDE_INT abs_d;
531 if (d == -1)
532 return NULL_TREE;
534 /* Since d might be INT_MIN, we have to cast to
535 unsigned HOST_WIDE_INT before negating to avoid
536 undefined signed overflow. */
537 abs_d = (d >= 0
538 ? (unsigned HOST_WIDE_INT) d
539 : - (unsigned HOST_WIDE_INT) d);
541 /* n rem d = n rem -d */
542 if (code == TRUNC_MOD_EXPR && d < 0)
543 d = abs_d;
544 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
546 /* This case is not handled correctly below. */
547 mode = -2;
548 continue;
550 if (abs_d <= 1)
552 mode = -2;
553 continue;
556 choose_multiplier (abs_d, prec, prec - 1, &ml,
557 &post_shift, &dummy_int);
558 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
560 this_mode = 4 + (d < 0);
561 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
563 else
564 this_mode = 2 + (d < 0);
566 mulc[i] = ml;
567 post_shifts[i] = post_shift;
568 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
569 || post_shift >= prec
570 || pre_shifts[i] >= prec)
571 this_mode = -2;
573 if (i == 0)
574 mode = this_mode;
575 else if (mode != this_mode)
576 mode = -2;
579 vec = XALLOCAVEC (tree, nunits);
581 if (use_pow2)
583 tree addend = NULL_TREE;
584 if (!unsignedp)
586 tree uns_type;
588 /* Both division and remainder sequences need
589 op0 < 0 ? mask : 0 computed. It can be either computed as
590 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
591 if none of the shifts is 0, or as the conditional. */
592 for (i = 0; i < nunits; i++)
593 if (shifts[i] == 0)
594 break;
595 uns_type
596 = build_vector_type (build_nonstandard_integer_type (prec, 1),
597 nunits);
598 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
600 for (i = 0; i < nunits; i++)
601 shift_temps[i] = prec - 1;
602 cur_op = add_rshift (gsi, type, op0, shift_temps);
603 if (cur_op != NULL_TREE)
605 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
606 uns_type, cur_op);
607 for (i = 0; i < nunits; i++)
608 shift_temps[i] = prec - shifts[i];
609 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
610 if (cur_op != NULL_TREE)
611 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
612 type, cur_op);
615 if (addend == NULL_TREE
616 && expand_vec_cond_expr_p (type, type))
618 tree zero, cst, cond;
619 gimple stmt;
621 zero = build_zero_cst (type);
622 cond = build2 (LT_EXPR, type, op0, zero);
623 for (i = 0; i < nunits; i++)
624 vec[i] = build_int_cst (TREE_TYPE (type),
625 ((unsigned HOST_WIDE_INT) 1
626 << shifts[i]) - 1);
627 cst = build_vector (type, vec);
628 addend = make_ssa_name (type, NULL);
629 stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend,
630 cond, cst, zero);
631 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
634 if (code == TRUNC_DIV_EXPR)
636 if (unsignedp)
638 /* q = op0 >> shift; */
639 cur_op = add_rshift (gsi, type, op0, shifts);
640 if (cur_op != NULL_TREE)
641 return cur_op;
643 else if (addend != NULL_TREE)
645 /* t1 = op0 + addend;
646 q = t1 >> shift; */
647 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
648 if (op != unknown_optab
649 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
651 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
652 cur_op = add_rshift (gsi, type, cur_op, shifts);
653 if (cur_op != NULL_TREE)
654 return cur_op;
658 else
660 tree mask;
661 for (i = 0; i < nunits; i++)
662 vec[i] = build_int_cst (TREE_TYPE (type),
663 ((unsigned HOST_WIDE_INT) 1
664 << shifts[i]) - 1);
665 mask = build_vector (type, vec);
666 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
667 if (op != unknown_optab
668 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
670 if (unsignedp)
671 /* r = op0 & mask; */
672 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
673 else if (addend != NULL_TREE)
675 /* t1 = op0 + addend;
676 t2 = t1 & mask;
677 r = t2 - addend; */
678 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
679 if (op != unknown_optab
680 && optab_handler (op, TYPE_MODE (type))
681 != CODE_FOR_nothing)
683 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
684 addend);
685 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
686 cur_op, mask);
687 op = optab_for_tree_code (MINUS_EXPR, type,
688 optab_default);
689 if (op != unknown_optab
690 && optab_handler (op, TYPE_MODE (type))
691 != CODE_FOR_nothing)
692 return gimplify_build2 (gsi, MINUS_EXPR, type,
693 cur_op, addend);
700 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
701 return NULL_TREE;
703 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
704 return NULL_TREE;
706 cur_op = op0;
708 switch (mode)
710 case 0:
711 gcc_assert (unsignedp);
712 /* t1 = oprnd0 >> pre_shift;
713 t2 = t1 h* ml;
714 q = t2 >> post_shift; */
715 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
716 if (cur_op == NULL_TREE)
717 return NULL_TREE;
718 break;
719 case 1:
720 gcc_assert (unsignedp);
721 for (i = 0; i < nunits; i++)
723 shift_temps[i] = 1;
724 post_shifts[i]--;
726 break;
727 case 2:
728 case 3:
729 case 4:
730 case 5:
731 gcc_assert (!unsignedp);
732 for (i = 0; i < nunits; i++)
733 shift_temps[i] = prec - 1;
734 break;
735 default:
736 return NULL_TREE;
739 for (i = 0; i < nunits; i++)
740 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
741 mulcst = build_vector (type, vec);
743 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
745 switch (mode)
747 case 0:
748 /* t1 = oprnd0 >> pre_shift;
749 t2 = t1 h* ml;
750 q = t2 >> post_shift; */
751 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
752 break;
753 case 1:
754 /* t1 = oprnd0 h* ml;
755 t2 = oprnd0 - t1;
756 t3 = t2 >> 1;
757 t4 = t1 + t3;
758 q = t4 >> (post_shift - 1); */
759 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
760 if (op == unknown_optab
761 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
762 return NULL_TREE;
763 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
764 tem = add_rshift (gsi, type, tem, shift_temps);
765 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
766 if (op == unknown_optab
767 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
768 return NULL_TREE;
769 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
770 cur_op = add_rshift (gsi, type, tem, post_shifts);
771 if (cur_op == NULL_TREE)
772 return NULL_TREE;
773 break;
774 case 2:
775 case 3:
776 case 4:
777 case 5:
778 /* t1 = oprnd0 h* ml;
779 t2 = t1; [ iff (mode & 2) != 0 ]
780 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
781 t3 = t2 >> post_shift;
782 t4 = oprnd0 >> (prec - 1);
783 q = t3 - t4; [ iff (mode & 1) == 0 ]
784 q = t4 - t3; [ iff (mode & 1) != 0 ] */
785 if ((mode & 2) == 0)
787 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
788 if (op == unknown_optab
789 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
790 return NULL_TREE;
791 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
793 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
794 if (cur_op == NULL_TREE)
795 return NULL_TREE;
796 tem = add_rshift (gsi, type, op0, shift_temps);
797 if (tem == NULL_TREE)
798 return NULL_TREE;
799 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
800 if (op == unknown_optab
801 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
802 return NULL_TREE;
803 if ((mode & 1) == 0)
804 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
805 else
806 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
807 break;
808 default:
809 gcc_unreachable ();
812 if (code == TRUNC_DIV_EXPR)
813 return cur_op;
815 /* We divided. Now finish by:
816 t1 = q * oprnd1;
817 r = oprnd0 - t1; */
818 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
819 if (op == unknown_optab
820 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
821 return NULL_TREE;
822 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
823 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
824 if (op == unknown_optab
825 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
826 return NULL_TREE;
827 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
830 /* Expand a vector condition to scalars, by using many conditions
831 on the vector's elements. */
832 static void
833 expand_vector_condition (gimple_stmt_iterator *gsi)
835 gimple stmt = gsi_stmt (*gsi);
836 tree type = gimple_expr_type (stmt);
837 tree a = gimple_assign_rhs1 (stmt);
838 tree a1 = a;
839 tree a2;
840 bool a_is_comparison = false;
841 tree b = gimple_assign_rhs2 (stmt);
842 tree c = gimple_assign_rhs3 (stmt);
843 vec<constructor_elt, va_gc> *v;
844 tree constr;
845 tree inner_type = TREE_TYPE (type);
846 tree cond_type = TREE_TYPE (TREE_TYPE (a));
847 tree comp_inner_type = cond_type;
848 tree width = TYPE_SIZE (inner_type);
849 tree index = bitsize_int (0);
850 int nunits = TYPE_VECTOR_SUBPARTS (type);
851 int i;
852 location_t loc = gimple_location (gsi_stmt (*gsi));
854 if (!is_gimple_val (a))
856 gcc_assert (COMPARISON_CLASS_P (a));
857 a_is_comparison = true;
858 a1 = TREE_OPERAND (a, 0);
859 a2 = TREE_OPERAND (a, 1);
860 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
863 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
864 return;
866 /* TODO: try and find a smaller vector type. */
868 warning_at (loc, OPT_Wvector_operation_performance,
869 "vector condition will be expanded piecewise");
871 vec_alloc (v, nunits);
872 for (i = 0; i < nunits;
873 i++, index = int_const_binop (PLUS_EXPR, index, width))
875 tree aa, result;
876 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
877 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
878 if (a_is_comparison)
880 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
881 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
882 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
884 else
885 aa = tree_vec_extract (gsi, cond_type, a, width, index);
886 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
887 constructor_elt ce = {NULL_TREE, result};
888 v->quick_push (ce);
891 constr = build_constructor (type, v);
892 gimple_assign_set_rhs_from_tree (gsi, constr);
893 update_stmt (gsi_stmt (*gsi));
896 static tree
897 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
898 gimple assign, enum tree_code code)
900 enum machine_mode compute_mode = TYPE_MODE (compute_type);
902 /* If the compute mode is not a vector mode (hence we are not decomposing
903 a BLKmode vector to smaller, hardware-supported vectors), we may want
904 to expand the operations in parallel. */
905 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
906 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
907 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
908 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
909 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
910 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
911 switch (code)
913 case PLUS_EXPR:
914 case MINUS_EXPR:
915 if (!TYPE_OVERFLOW_TRAPS (type))
916 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
917 gimple_assign_rhs1 (assign),
918 gimple_assign_rhs2 (assign), code);
919 break;
921 case NEGATE_EXPR:
922 if (!TYPE_OVERFLOW_TRAPS (type))
923 return expand_vector_addition (gsi, do_unop, do_negate, type,
924 gimple_assign_rhs1 (assign),
925 NULL_TREE, code);
926 break;
928 case BIT_AND_EXPR:
929 case BIT_IOR_EXPR:
930 case BIT_XOR_EXPR:
931 return expand_vector_parallel (gsi, do_binop, type,
932 gimple_assign_rhs1 (assign),
933 gimple_assign_rhs2 (assign), code);
935 case BIT_NOT_EXPR:
936 return expand_vector_parallel (gsi, do_unop, type,
937 gimple_assign_rhs1 (assign),
938 NULL_TREE, code);
939 case EQ_EXPR:
940 case NE_EXPR:
941 case GT_EXPR:
942 case LT_EXPR:
943 case GE_EXPR:
944 case LE_EXPR:
945 case UNEQ_EXPR:
946 case UNGT_EXPR:
947 case UNLT_EXPR:
948 case UNGE_EXPR:
949 case UNLE_EXPR:
950 case LTGT_EXPR:
951 case ORDERED_EXPR:
952 case UNORDERED_EXPR:
954 tree rhs1 = gimple_assign_rhs1 (assign);
955 tree rhs2 = gimple_assign_rhs2 (assign);
957 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
960 case TRUNC_DIV_EXPR:
961 case TRUNC_MOD_EXPR:
963 tree rhs1 = gimple_assign_rhs1 (assign);
964 tree rhs2 = gimple_assign_rhs2 (assign);
965 tree ret;
967 if (!optimize
968 || !VECTOR_INTEGER_TYPE_P (type)
969 || TREE_CODE (rhs2) != VECTOR_CST)
970 break;
972 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
973 if (ret != NULL_TREE)
974 return ret;
975 break;
978 default:
979 break;
982 if (TREE_CODE_CLASS (code) == tcc_unary)
983 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
984 gimple_assign_rhs1 (assign),
985 NULL_TREE, code);
986 else
987 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
988 gimple_assign_rhs1 (assign),
989 gimple_assign_rhs2 (assign), code);
992 /* Return a type for the widest vector mode whose components are of type
993 TYPE, or NULL_TREE if none is found. */
995 static tree
996 type_for_widest_vector_mode (tree type, optab op)
998 enum machine_mode inner_mode = TYPE_MODE (type);
999 enum machine_mode best_mode = VOIDmode, mode;
1000 int best_nunits = 0;
1002 if (SCALAR_FLOAT_MODE_P (inner_mode))
1003 mode = MIN_MODE_VECTOR_FLOAT;
1004 else if (SCALAR_FRACT_MODE_P (inner_mode))
1005 mode = MIN_MODE_VECTOR_FRACT;
1006 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1007 mode = MIN_MODE_VECTOR_UFRACT;
1008 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1009 mode = MIN_MODE_VECTOR_ACCUM;
1010 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1011 mode = MIN_MODE_VECTOR_UACCUM;
1012 else
1013 mode = MIN_MODE_VECTOR_INT;
1015 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1016 if (GET_MODE_INNER (mode) == inner_mode
1017 && GET_MODE_NUNITS (mode) > best_nunits
1018 && optab_handler (op, mode) != CODE_FOR_nothing)
1019 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1021 if (best_mode == VOIDmode)
1022 return NULL_TREE;
1023 else
1024 return build_vector_type_for_mode (type, best_mode);
1028 /* Build a reference to the element of the vector VECT. Function
1029 returns either the element itself, either BIT_FIELD_REF, or an
1030 ARRAY_REF expression.
1032 GSI is required to insert temporary variables while building a
1033 refernece to the element of the vector VECT.
1035 PTMPVEC is a pointer to the temporary variable for caching
1036 purposes. In case when PTMPVEC is NULL new temporary variable
1037 will be created. */
1038 static tree
1039 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1041 tree vect_type, vect_elt_type;
1042 gimple asgn;
1043 tree tmpvec;
1044 tree arraytype;
1045 bool need_asgn = true;
1046 unsigned int elements;
1048 vect_type = TREE_TYPE (vect);
1049 vect_elt_type = TREE_TYPE (vect_type);
1050 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1052 if (TREE_CODE (idx) == INTEGER_CST)
1054 unsigned HOST_WIDE_INT index;
1056 /* Given that we're about to compute a binary modulus,
1057 we don't care about the high bits of the value. */
1058 index = TREE_INT_CST_LOW (idx);
1059 if (!tree_fits_uhwi_p (idx) || index >= elements)
1061 index &= elements - 1;
1062 idx = build_int_cst (TREE_TYPE (idx), index);
1065 /* When lowering a vector statement sequence do some easy
1066 simplification by looking through intermediate vector results. */
1067 if (TREE_CODE (vect) == SSA_NAME)
1069 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1070 if (is_gimple_assign (def_stmt)
1071 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1072 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1073 vect = gimple_assign_rhs1 (def_stmt);
1076 if (TREE_CODE (vect) == VECTOR_CST)
1077 return VECTOR_CST_ELT (vect, index);
1078 else if (TREE_CODE (vect) == CONSTRUCTOR
1079 && (CONSTRUCTOR_NELTS (vect) == 0
1080 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1081 != VECTOR_TYPE))
1083 if (index < CONSTRUCTOR_NELTS (vect))
1084 return CONSTRUCTOR_ELT (vect, index)->value;
1085 return build_zero_cst (vect_elt_type);
1087 else
1089 tree size = TYPE_SIZE (vect_elt_type);
1090 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1091 size);
1092 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1096 if (!ptmpvec)
1097 tmpvec = create_tmp_var (vect_type, "vectmp");
1098 else if (!*ptmpvec)
1099 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1100 else
1102 tmpvec = *ptmpvec;
1103 need_asgn = false;
1106 if (need_asgn)
1108 TREE_ADDRESSABLE (tmpvec) = 1;
1109 asgn = gimple_build_assign (tmpvec, vect);
1110 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1113 arraytype = build_array_type_nelts (vect_elt_type, elements);
1114 return build4 (ARRAY_REF, vect_elt_type,
1115 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1116 idx, NULL_TREE, NULL_TREE);
1119 /* Check if VEC_PERM_EXPR within the given setting is supported
1120 by hardware, or lower it piecewise.
1122 When VEC_PERM_EXPR has the same first and second operands:
1123 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1124 {v0[mask[0]], v0[mask[1]], ...}
1125 MASK and V0 must have the same number of elements.
1127 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1128 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1129 V0 and V1 must have the same type. MASK, V0, V1 must have the
1130 same number of arguments. */
1132 static void
1133 lower_vec_perm (gimple_stmt_iterator *gsi)
1135 gimple stmt = gsi_stmt (*gsi);
1136 tree mask = gimple_assign_rhs3 (stmt);
1137 tree vec0 = gimple_assign_rhs1 (stmt);
1138 tree vec1 = gimple_assign_rhs2 (stmt);
1139 tree vect_type = TREE_TYPE (vec0);
1140 tree mask_type = TREE_TYPE (mask);
1141 tree vect_elt_type = TREE_TYPE (vect_type);
1142 tree mask_elt_type = TREE_TYPE (mask_type);
1143 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1144 vec<constructor_elt, va_gc> *v;
1145 tree constr, t, si, i_val;
1146 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1147 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1148 location_t loc = gimple_location (gsi_stmt (*gsi));
1149 unsigned i;
1151 if (TREE_CODE (mask) == SSA_NAME)
1153 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1154 if (is_gimple_assign (def_stmt)
1155 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1156 mask = gimple_assign_rhs1 (def_stmt);
1159 if (TREE_CODE (mask) == VECTOR_CST)
1161 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1163 for (i = 0; i < elements; ++i)
1164 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1165 & (2 * elements - 1));
1167 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1169 gimple_assign_set_rhs3 (stmt, mask);
1170 update_stmt (stmt);
1171 return;
1174 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1175 return;
1177 warning_at (loc, OPT_Wvector_operation_performance,
1178 "vector shuffling operation will be expanded piecewise");
1180 vec_alloc (v, elements);
1181 for (i = 0; i < elements; i++)
1183 si = size_int (i);
1184 i_val = vector_element (gsi, mask, si, &masktmp);
1186 if (TREE_CODE (i_val) == INTEGER_CST)
1188 unsigned HOST_WIDE_INT index;
1190 index = TREE_INT_CST_LOW (i_val);
1191 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1192 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1194 if (two_operand_p && (index & elements) != 0)
1195 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1196 else
1197 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1199 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1200 true, GSI_SAME_STMT);
1202 else
1204 tree cond = NULL_TREE, v0_val;
1206 if (two_operand_p)
1208 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1209 build_int_cst (mask_elt_type, elements));
1210 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1211 true, GSI_SAME_STMT);
1214 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1215 build_int_cst (mask_elt_type, elements - 1));
1216 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1217 true, GSI_SAME_STMT);
1219 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1220 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1221 true, GSI_SAME_STMT);
1223 if (two_operand_p)
1225 tree v1_val;
1227 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1228 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1229 true, GSI_SAME_STMT);
1231 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1232 cond, build_zero_cst (mask_elt_type));
1233 cond = fold_build3 (COND_EXPR, vect_elt_type,
1234 cond, v0_val, v1_val);
1235 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1236 true, GSI_SAME_STMT);
1238 else
1239 t = v0_val;
1242 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1245 constr = build_constructor (vect_type, v);
1246 gimple_assign_set_rhs_from_tree (gsi, constr);
1247 update_stmt (gsi_stmt (*gsi));
1250 /* Process one statement. If we identify a vector operation, expand it. */
1252 static void
1253 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1255 gimple stmt = gsi_stmt (*gsi);
1256 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
1257 enum tree_code code;
1258 enum machine_mode compute_mode;
1259 optab op = unknown_optab;
1260 enum gimple_rhs_class rhs_class;
1261 tree new_rhs;
1263 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1264 return;
1266 code = gimple_assign_rhs_code (stmt);
1267 rhs_class = get_gimple_rhs_class (code);
1268 lhs = gimple_assign_lhs (stmt);
1270 if (code == VEC_PERM_EXPR)
1272 lower_vec_perm (gsi);
1273 return;
1276 if (code == VEC_COND_EXPR)
1278 expand_vector_condition (gsi);
1279 return;
1281 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1282 return;
1284 rhs1 = gimple_assign_rhs1 (stmt);
1285 type = gimple_expr_type (stmt);
1286 if (rhs_class == GIMPLE_BINARY_RHS)
1287 rhs2 = gimple_assign_rhs2 (stmt);
1289 if (TREE_CODE (type) != VECTOR_TYPE)
1290 return;
1292 if (code == NOP_EXPR
1293 || code == FLOAT_EXPR
1294 || code == FIX_TRUNC_EXPR
1295 || code == VIEW_CONVERT_EXPR)
1296 return;
1298 gcc_assert (code != CONVERT_EXPR);
1300 /* The signedness is determined from input argument. */
1301 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1302 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1303 type = TREE_TYPE (rhs1);
1305 /* For widening/narrowing vector operations, the relevant type is of the
1306 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1307 calculated in the same way above. */
1308 if (code == WIDEN_SUM_EXPR
1309 || code == VEC_WIDEN_MULT_HI_EXPR
1310 || code == VEC_WIDEN_MULT_LO_EXPR
1311 || code == VEC_WIDEN_MULT_EVEN_EXPR
1312 || code == VEC_WIDEN_MULT_ODD_EXPR
1313 || code == VEC_UNPACK_HI_EXPR
1314 || code == VEC_UNPACK_LO_EXPR
1315 || code == VEC_PACK_TRUNC_EXPR
1316 || code == VEC_PACK_SAT_EXPR
1317 || code == VEC_PACK_FIX_TRUNC_EXPR
1318 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1319 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1320 type = TREE_TYPE (rhs1);
1322 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1323 scalar */
1324 if (code == LSHIFT_EXPR
1325 || code == RSHIFT_EXPR
1326 || code == LROTATE_EXPR
1327 || code == RROTATE_EXPR)
1329 optab opv;
1331 /* Check whether we have vector <op> {x,x,x,x} where x
1332 could be a scalar variable or a constant. Transform
1333 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1334 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1336 tree first;
1337 gimple def_stmt;
1339 if ((TREE_CODE (rhs2) == VECTOR_CST
1340 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1341 || (TREE_CODE (rhs2) == SSA_NAME
1342 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1343 && gimple_assign_single_p (def_stmt)
1344 && (first = uniform_vector_p
1345 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1347 gimple_assign_set_rhs2 (stmt, first);
1348 update_stmt (stmt);
1349 rhs2 = first;
1353 opv = optab_for_tree_code (code, type, optab_vector);
1354 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1355 op = opv;
1356 else
1358 op = optab_for_tree_code (code, type, optab_scalar);
1360 /* The rtl expander will expand vector/scalar as vector/vector
1361 if necessary. Don't bother converting the stmt here. */
1362 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
1363 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
1364 return;
1367 else
1368 op = optab_for_tree_code (code, type, optab_default);
1370 /* Optabs will try converting a negation into a subtraction, so
1371 look for it as well. TODO: negation of floating-point vectors
1372 might be turned into an exclusive OR toggling the sign bit. */
1373 if (op == unknown_optab
1374 && code == NEGATE_EXPR
1375 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1376 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1378 /* For very wide vectors, try using a smaller vector mode. */
1379 compute_type = type;
1380 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
1382 tree vector_compute_type
1383 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1384 if (vector_compute_type != NULL_TREE
1385 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1386 < TYPE_VECTOR_SUBPARTS (compute_type))
1387 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1388 != CODE_FOR_nothing))
1389 compute_type = vector_compute_type;
1392 /* If we are breaking a BLKmode vector into smaller pieces,
1393 type_for_widest_vector_mode has already looked into the optab,
1394 so skip these checks. */
1395 if (compute_type == type)
1397 compute_mode = TYPE_MODE (compute_type);
1398 if (VECTOR_MODE_P (compute_mode))
1400 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1401 return;
1402 if (code == MULT_HIGHPART_EXPR
1403 && can_mult_highpart_p (compute_mode,
1404 TYPE_UNSIGNED (compute_type)))
1405 return;
1407 /* There is no operation in hardware, so fall back to scalars. */
1408 compute_type = TREE_TYPE (type);
1411 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
1412 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1414 /* Leave expression untouched for later expansion. */
1415 if (new_rhs == NULL_TREE)
1416 return;
1418 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1419 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1420 new_rhs);
1422 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1423 way to do it is change expand_vector_operation and its callees to
1424 return a tree_code, RHS1 and RHS2 instead of a tree. */
1425 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1426 update_stmt (gsi_stmt (*gsi));
1429 /* Use this to lower vector operations introduced by the vectorizer,
1430 if it may need the bit-twiddling tricks implemented in this file. */
1432 static bool
1433 gate_expand_vector_operations_ssa (void)
1435 return !(cfun->curr_properties & PROP_gimple_lvec);
1438 static unsigned int
1439 expand_vector_operations (void)
1441 gimple_stmt_iterator gsi;
1442 basic_block bb;
1443 bool cfg_changed = false;
1445 FOR_EACH_BB (bb)
1447 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1449 expand_vector_operations_1 (&gsi);
1450 /* ??? If we do not cleanup EH then we will ICE in
1451 verification. But in reality we have created wrong-code
1452 as we did not properly transition EH info and edges to
1453 the piecewise computations. */
1454 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1455 && gimple_purge_dead_eh_edges (bb))
1456 cfg_changed = true;
1460 return cfg_changed ? TODO_cleanup_cfg : 0;
1463 namespace {
1465 const pass_data pass_data_lower_vector =
1467 GIMPLE_PASS, /* type */
1468 "veclower", /* name */
1469 OPTGROUP_VEC, /* optinfo_flags */
1470 true, /* has_gate */
1471 true, /* has_execute */
1472 TV_NONE, /* tv_id */
1473 PROP_cfg, /* properties_required */
1474 PROP_gimple_lvec, /* properties_provided */
1475 0, /* properties_destroyed */
1476 0, /* todo_flags_start */
1477 ( TODO_update_ssa | TODO_verify_ssa
1478 | TODO_verify_stmts
1479 | TODO_verify_flow
1480 | TODO_cleanup_cfg ), /* todo_flags_finish */
1483 class pass_lower_vector : public gimple_opt_pass
1485 public:
1486 pass_lower_vector (gcc::context *ctxt)
1487 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1490 /* opt_pass methods: */
1491 bool gate () { return gate_expand_vector_operations_ssa (); }
1492 unsigned int execute () { return expand_vector_operations (); }
1494 }; // class pass_lower_vector
1496 } // anon namespace
1498 gimple_opt_pass *
1499 make_pass_lower_vector (gcc::context *ctxt)
1501 return new pass_lower_vector (ctxt);
1504 namespace {
1506 const pass_data pass_data_lower_vector_ssa =
1508 GIMPLE_PASS, /* type */
1509 "veclower2", /* name */
1510 OPTGROUP_VEC, /* optinfo_flags */
1511 false, /* has_gate */
1512 true, /* has_execute */
1513 TV_NONE, /* tv_id */
1514 PROP_cfg, /* properties_required */
1515 PROP_gimple_lvec, /* properties_provided */
1516 0, /* properties_destroyed */
1517 0, /* todo_flags_start */
1518 ( TODO_update_ssa | TODO_verify_ssa
1519 | TODO_verify_stmts
1520 | TODO_verify_flow
1521 | TODO_cleanup_cfg ), /* todo_flags_finish */
1524 class pass_lower_vector_ssa : public gimple_opt_pass
1526 public:
1527 pass_lower_vector_ssa (gcc::context *ctxt)
1528 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1531 /* opt_pass methods: */
1532 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1533 unsigned int execute () { return expand_vector_operations (); }
1535 }; // class pass_lower_vector_ssa
1537 } // anon namespace
1539 gimple_opt_pass *
1540 make_pass_lower_vector_ssa (gcc::context *ctxt)
1542 return new pass_lower_vector_ssa (ctxt);
1545 #include "gt-tree-vect-generic.h"