Fix ChangeLog record for 171649:
[official-gcc.git] / gcc / tree-vect-generic.c
blob9dec8c63770265602c7e0f02d7db5d7fa06f68b3
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
28 #include "gimple.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
31 #include "flags.h"
32 #include "ggc.h"
33 #include "diagnostic.h"
35 /* Need to include rtl.h, expr.h, etc. for optabs. */
36 #include "expr.h"
37 #include "optabs.h"
40 static void expand_vector_operations_1 (gimple_stmt_iterator *);
43 /* Build a constant of type TYPE, made of VALUE's bits replicated
44 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
45 static tree
46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
48 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
49 int n = HOST_BITS_PER_WIDE_INT / width;
50 unsigned HOST_WIDE_INT low, high, mask;
51 tree ret;
53 gcc_assert (n);
55 if (width == HOST_BITS_PER_WIDE_INT)
56 low = value;
57 else
59 mask = ((HOST_WIDE_INT)1 << width) - 1;
60 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
63 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
64 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
65 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
66 high = 0;
67 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
68 high = low;
69 else
70 gcc_unreachable ();
72 ret = build_int_cst_wide (type, low, high);
73 return ret;
76 static GTY(()) tree vector_inner_type;
77 static GTY(()) tree vector_last_type;
78 static GTY(()) int vector_last_nunits;
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81 "word_mode" (the global variable). */
82 static tree
83 build_word_mode_vector_type (int nunits)
85 if (!vector_inner_type)
86 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
87 else if (vector_last_nunits == nunits)
89 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
90 return vector_last_type;
93 /* We build a new type, but we canonicalize it nevertheless,
94 because it still saves some memory. */
95 vector_last_nunits = nunits;
96 vector_last_type = type_hash_canon (nunits,
97 build_vector_type (vector_inner_type,
98 nunits));
99 return vector_last_type;
102 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
103 tree, tree, tree, tree, tree, enum tree_code);
105 static inline tree
106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107 tree t, tree bitsize, tree bitpos)
109 if (bitpos)
110 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
111 else
112 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
115 static tree
116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
117 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
118 enum tree_code code)
120 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
121 return gimplify_build1 (gsi, code, inner_type, a);
124 static tree
125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
126 tree bitpos, tree bitsize, enum tree_code code)
128 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
129 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
130 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
131 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
132 return gimplify_build2 (gsi, code, inner_type, a, b);
135 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
137 INNER_TYPE is the type of A and B elements
139 returned expression is of signed integer type with the
140 size equal to the size of INNER_TYPE. */
141 static tree
142 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
143 tree bitpos, tree bitsize, enum tree_code code)
145 tree comp_type;
147 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
148 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
150 comp_type = build_nonstandard_integer_type
151 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
153 return gimplify_build3 (gsi, COND_EXPR, comp_type,
154 fold_build2 (code, boolean_type_node, a, b),
155 build_int_cst (comp_type, -1),
156 build_int_cst (comp_type, 0));
159 /* Expand vector addition to scalars. This does bit twiddling
160 in order to increase parallelism:
162 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
163 (a ^ b) & 0x80808080
165 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
166 (a ^ ~b) & 0x80808080
168 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
170 This optimization should be done only if 4 vector items or more
171 fit into a word. */
172 static tree
173 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
174 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
175 enum tree_code code)
177 tree inner_type = TREE_TYPE (TREE_TYPE (a));
178 unsigned HOST_WIDE_INT max;
179 tree low_bits, high_bits, a_low, b_low, result_low, signs;
181 max = GET_MODE_MASK (TYPE_MODE (inner_type));
182 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
183 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
185 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
186 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
188 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
189 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
190 if (code == PLUS_EXPR)
191 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
192 else
194 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
195 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
198 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
199 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
200 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
203 static tree
204 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
205 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
206 tree bitsize ATTRIBUTE_UNUSED,
207 enum tree_code code ATTRIBUTE_UNUSED)
209 tree inner_type = TREE_TYPE (TREE_TYPE (b));
210 HOST_WIDE_INT max;
211 tree low_bits, high_bits, b_low, result_low, signs;
213 max = GET_MODE_MASK (TYPE_MODE (inner_type));
214 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
215 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
217 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
219 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
220 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
221 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
222 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
223 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
226 /* Expand a vector operation to scalars, by using many operations
227 whose type is the vector type's inner type. */
228 static tree
229 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
230 tree type, tree inner_type,
231 tree a, tree b, enum tree_code code)
233 VEC(constructor_elt,gc) *v;
234 tree part_width = TYPE_SIZE (inner_type);
235 tree index = bitsize_int (0);
236 int nunits = TYPE_VECTOR_SUBPARTS (type);
237 int delta = tree_low_cst (part_width, 1)
238 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
239 int i;
240 location_t loc = gimple_location (gsi_stmt (*gsi));
242 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
243 warning_at (loc, OPT_Wvector_operation_performance,
244 "vector operation will be expanded piecewise");
245 else
246 warning_at (loc, OPT_Wvector_operation_performance,
247 "vector operation will be expanded in parallel");
249 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
250 for (i = 0; i < nunits;
251 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
253 tree result = f (gsi, inner_type, a, b, index, part_width, code);
254 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
255 ce->index = NULL_TREE;
256 ce->value = result;
259 return build_constructor (type, v);
262 /* Expand a vector operation to scalars with the freedom to use
263 a scalar integer type, or to use a different size for the items
264 in the vector type. */
265 static tree
266 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
267 tree a, tree b,
268 enum tree_code code)
270 tree result, compute_type;
271 enum machine_mode mode;
272 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
273 location_t loc = gimple_location (gsi_stmt (*gsi));
275 /* We have three strategies. If the type is already correct, just do
276 the operation an element at a time. Else, if the vector is wider than
277 one word, do it a word at a time; finally, if the vector is smaller
278 than one word, do it as a scalar. */
279 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
280 return expand_vector_piecewise (gsi, f,
281 type, TREE_TYPE (type),
282 a, b, code);
283 else if (n_words > 1)
285 tree word_type = build_word_mode_vector_type (n_words);
286 result = expand_vector_piecewise (gsi, f,
287 word_type, TREE_TYPE (word_type),
288 a, b, code);
289 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
290 GSI_SAME_STMT);
292 else
294 /* Use a single scalar operation with a mode no wider than word_mode. */
295 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
296 compute_type = lang_hooks.types.type_for_mode (mode, 1);
297 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
298 warning_at (loc, OPT_Wvector_operation_performance,
299 "vector operation will be expanded with a "
300 "single scalar operation");
303 return result;
306 /* Expand a vector operation to scalars; for integer types we can use
307 special bit twiddling tricks to do the sums a word at a time, using
308 function F_PARALLEL instead of F. These tricks are done only if
309 they can process at least four items, that is, only if the vector
310 holds at least four items and if a word can hold four items. */
311 static tree
312 expand_vector_addition (gimple_stmt_iterator *gsi,
313 elem_op_func f, elem_op_func f_parallel,
314 tree type, tree a, tree b, enum tree_code code)
316 int parts_per_word = UNITS_PER_WORD
317 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
319 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
320 && parts_per_word >= 4
321 && TYPE_VECTOR_SUBPARTS (type) >= 4)
322 return expand_vector_parallel (gsi, f_parallel,
323 type, a, b, code);
324 else
325 return expand_vector_piecewise (gsi, f,
326 type, TREE_TYPE (type),
327 a, b, code);
330 /* Check if vector VEC consists of all the equal elements and
331 that the number of elements corresponds to the type of VEC.
332 The function returns first element of the vector
333 or NULL_TREE if the vector is not uniform. */
334 static tree
335 uniform_vector_p (tree vec)
337 tree first, t, els;
338 unsigned i;
340 if (vec == NULL_TREE)
341 return NULL_TREE;
343 if (TREE_CODE (vec) == VECTOR_CST)
345 els = TREE_VECTOR_CST_ELTS (vec);
346 first = TREE_VALUE (els);
347 els = TREE_CHAIN (els);
349 for (t = els; t; t = TREE_CHAIN (t))
350 if (!operand_equal_p (first, TREE_VALUE (t), 0))
351 return NULL_TREE;
353 return first;
356 else if (TREE_CODE (vec) == CONSTRUCTOR)
358 first = error_mark_node;
360 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
362 if (i == 0)
364 first = t;
365 continue;
367 if (!operand_equal_p (first, t, 0))
368 return NULL_TREE;
370 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
371 return NULL_TREE;
373 return first;
376 return NULL_TREE;
379 /* Try to expand vector comparison expression OP0 CODE OP1 by
380 querying optab if the following expression:
381 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
382 can be expanded. */
383 static tree
384 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
385 tree op1, enum tree_code code)
387 tree t;
388 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
389 t = expand_vector_piecewise (gsi, do_compare, type,
390 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
391 else
392 t = NULL_TREE;
394 return t;
397 static tree
398 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
399 gimple assign, enum tree_code code)
401 enum machine_mode compute_mode = TYPE_MODE (compute_type);
403 /* If the compute mode is not a vector mode (hence we are not decomposing
404 a BLKmode vector to smaller, hardware-supported vectors), we may want
405 to expand the operations in parallel. */
406 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
407 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
408 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
409 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
410 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
411 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
412 switch (code)
414 case PLUS_EXPR:
415 case MINUS_EXPR:
416 if (!TYPE_OVERFLOW_TRAPS (type))
417 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
418 gimple_assign_rhs1 (assign),
419 gimple_assign_rhs2 (assign), code);
420 break;
422 case NEGATE_EXPR:
423 if (!TYPE_OVERFLOW_TRAPS (type))
424 return expand_vector_addition (gsi, do_unop, do_negate, type,
425 gimple_assign_rhs1 (assign),
426 NULL_TREE, code);
427 break;
429 case BIT_AND_EXPR:
430 case BIT_IOR_EXPR:
431 case BIT_XOR_EXPR:
432 return expand_vector_parallel (gsi, do_binop, type,
433 gimple_assign_rhs1 (assign),
434 gimple_assign_rhs2 (assign), code);
436 case BIT_NOT_EXPR:
437 return expand_vector_parallel (gsi, do_unop, type,
438 gimple_assign_rhs1 (assign),
439 NULL_TREE, code);
440 case EQ_EXPR:
441 case NE_EXPR:
442 case GT_EXPR:
443 case LT_EXPR:
444 case GE_EXPR:
445 case LE_EXPR:
446 case UNEQ_EXPR:
447 case UNGT_EXPR:
448 case UNLT_EXPR:
449 case UNGE_EXPR:
450 case UNLE_EXPR:
451 case LTGT_EXPR:
452 case ORDERED_EXPR:
453 case UNORDERED_EXPR:
455 tree rhs1 = gimple_assign_rhs1 (assign);
456 tree rhs2 = gimple_assign_rhs2 (assign);
458 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
460 default:
461 break;
464 if (TREE_CODE_CLASS (code) == tcc_unary)
465 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
466 gimple_assign_rhs1 (assign),
467 NULL_TREE, code);
468 else
469 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
470 gimple_assign_rhs1 (assign),
471 gimple_assign_rhs2 (assign), code);
474 /* Return a type for the widest vector mode whose components are of mode
475 INNER_MODE, or NULL_TREE if none is found.
476 SATP is true for saturating fixed-point types. */
478 static tree
479 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp)
481 enum machine_mode best_mode = VOIDmode, mode;
482 int best_nunits = 0;
484 if (SCALAR_FLOAT_MODE_P (inner_mode))
485 mode = MIN_MODE_VECTOR_FLOAT;
486 else if (SCALAR_FRACT_MODE_P (inner_mode))
487 mode = MIN_MODE_VECTOR_FRACT;
488 else if (SCALAR_UFRACT_MODE_P (inner_mode))
489 mode = MIN_MODE_VECTOR_UFRACT;
490 else if (SCALAR_ACCUM_MODE_P (inner_mode))
491 mode = MIN_MODE_VECTOR_ACCUM;
492 else if (SCALAR_UACCUM_MODE_P (inner_mode))
493 mode = MIN_MODE_VECTOR_UACCUM;
494 else
495 mode = MIN_MODE_VECTOR_INT;
497 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
498 if (GET_MODE_INNER (mode) == inner_mode
499 && GET_MODE_NUNITS (mode) > best_nunits
500 && optab_handler (op, mode) != CODE_FOR_nothing)
501 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
503 if (best_mode == VOIDmode)
504 return NULL_TREE;
505 else
507 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */
508 if (ALL_FIXED_POINT_MODE_P (best_mode))
509 return lang_hooks.types.type_for_mode (best_mode, satp);
511 return lang_hooks.types.type_for_mode (best_mode, 1);
516 /* Build a reference to the element of the vector VECT. Function
517 returns either the element itself, either BIT_FIELD_REF, or an
518 ARRAY_REF expression.
520 GSI is requred to insert temporary variables while building a
521 refernece to the element of the vector VECT.
523 PTMPVEC is a pointer to the temporary variable for caching
524 purposes. In case when PTMPVEC is NULL new temporary variable
525 will be created. */
526 static tree
527 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
529 tree vect_type, vect_elt_type;
530 gimple asgn;
531 tree tmpvec;
532 tree arraytype;
533 bool need_asgn = true;
534 unsigned int elements;
536 vect_type = TREE_TYPE (vect);
537 vect_elt_type = TREE_TYPE (vect_type);
538 elements = TYPE_VECTOR_SUBPARTS (vect_type);
540 if (TREE_CODE (idx) == INTEGER_CST)
542 unsigned HOST_WIDE_INT index;
544 /* Given that we're about to compute a binary modulus,
545 we don't care about the high bits of the value. */
546 index = TREE_INT_CST_LOW (idx);
547 if (!host_integerp (idx, 1) || index >= elements)
549 index &= elements - 1;
550 idx = build_int_cst (TREE_TYPE (idx), index);
553 /* When lowering a vector statement sequence do some easy
554 simplification by looking through intermediate vector results. */
555 if (TREE_CODE (vect) == SSA_NAME)
557 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
558 if (is_gimple_assign (def_stmt)
559 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
560 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
561 vect = gimple_assign_rhs1 (def_stmt);
564 if (TREE_CODE (vect) == VECTOR_CST)
566 unsigned i;
567 tree vals = TREE_VECTOR_CST_ELTS (vect);
568 for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
569 if (i == index)
570 return TREE_VALUE (vals);
571 return build_zero_cst (vect_elt_type);
573 else if (TREE_CODE (vect) == CONSTRUCTOR)
575 unsigned i;
576 tree elt_i, elt_v;
578 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v)
579 if (operand_equal_p (elt_i, idx, 0))
580 return elt_v;
581 return build_zero_cst (vect_elt_type);
583 else
585 tree size = TYPE_SIZE (vect_elt_type);
586 tree pos = fold_build2 (MULT_EXPR, TREE_TYPE (idx), idx, size);
587 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
591 if (!ptmpvec)
592 tmpvec = create_tmp_var (vect_type, "vectmp");
593 else if (!*ptmpvec)
594 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
595 else
597 tmpvec = *ptmpvec;
598 need_asgn = false;
601 if (need_asgn)
603 TREE_ADDRESSABLE (tmpvec) = 1;
604 asgn = gimple_build_assign (tmpvec, vect);
605 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
608 arraytype = build_array_type_nelts (vect_elt_type, elements);
609 return build4 (ARRAY_REF, vect_elt_type,
610 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
611 idx, NULL_TREE, NULL_TREE);
614 /* Check if VEC_PERM_EXPR within the given setting is supported
615 by hardware, or lower it piecewise.
617 When VEC_PERM_EXPR has the same first and second operands:
618 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
619 {v0[mask[0]], v0[mask[1]], ...}
620 MASK and V0 must have the same number of elements.
622 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
623 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
624 V0 and V1 must have the same type. MASK, V0, V1 must have the
625 same number of arguments. */
627 static void
628 lower_vec_perm (gimple_stmt_iterator *gsi)
630 gimple stmt = gsi_stmt (*gsi);
631 tree mask = gimple_assign_rhs3 (stmt);
632 tree vec0 = gimple_assign_rhs1 (stmt);
633 tree vec1 = gimple_assign_rhs2 (stmt);
634 tree vect_type = TREE_TYPE (vec0);
635 tree mask_type = TREE_TYPE (mask);
636 tree vect_elt_type = TREE_TYPE (vect_type);
637 tree mask_elt_type = TREE_TYPE (mask_type);
638 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
639 VEC(constructor_elt,gc) *v;
640 tree constr, t, si, i_val;
641 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
642 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
643 location_t loc = gimple_location (gsi_stmt (*gsi));
644 unsigned i;
646 if (TREE_CODE (mask) == VECTOR_CST)
648 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
649 tree vals = TREE_VECTOR_CST_ELTS (mask);
651 for (i = 0; i < elements; ++i, vals = TREE_CHAIN (vals))
652 sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals)) & (2 * elements - 1);
654 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
655 return;
657 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
658 return;
660 warning_at (loc, OPT_Wvector_operation_performance,
661 "vector shuffling operation will be expanded piecewise");
663 v = VEC_alloc (constructor_elt, gc, elements);
664 for (i = 0; i < elements; i++)
666 si = size_int (i);
667 i_val = vector_element (gsi, mask, si, &masktmp);
669 if (TREE_CODE (i_val) == INTEGER_CST)
671 unsigned HOST_WIDE_INT index;
673 index = TREE_INT_CST_LOW (i_val);
674 if (!host_integerp (i_val, 1) || index >= elements)
675 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
677 if (two_operand_p && (index & elements) != 0)
678 t = vector_element (gsi, vec1, i_val, &vec1tmp);
679 else
680 t = vector_element (gsi, vec0, i_val, &vec0tmp);
682 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
683 true, GSI_SAME_STMT);
685 else
687 tree cond = NULL_TREE, v0_val;
689 if (two_operand_p)
691 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
692 build_int_cst (mask_elt_type, elements));
693 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
694 true, GSI_SAME_STMT);
697 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
698 build_int_cst (mask_elt_type, elements - 1));
699 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
700 true, GSI_SAME_STMT);
702 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
703 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
704 true, GSI_SAME_STMT);
706 if (two_operand_p)
708 tree v1_val;
710 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
711 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
712 true, GSI_SAME_STMT);
714 cond = fold_build2 (EQ_EXPR, boolean_type_node,
715 cond, build_zero_cst (mask_elt_type));
716 cond = fold_build3 (COND_EXPR, vect_elt_type,
717 cond, v0_val, v1_val);
718 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
719 true, GSI_SAME_STMT);
721 else
722 t = v0_val;
725 CONSTRUCTOR_APPEND_ELT (v, si, t);
728 constr = build_constructor (vect_type, v);
729 gimple_assign_set_rhs_from_tree (gsi, constr);
730 update_stmt (gsi_stmt (*gsi));
733 /* Process one statement. If we identify a vector operation, expand it. */
735 static void
736 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
738 gimple stmt = gsi_stmt (*gsi);
739 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
740 enum tree_code code;
741 enum machine_mode compute_mode;
742 optab op = NULL;
743 enum gimple_rhs_class rhs_class;
744 tree new_rhs;
746 if (gimple_code (stmt) != GIMPLE_ASSIGN)
747 return;
749 code = gimple_assign_rhs_code (stmt);
750 rhs_class = get_gimple_rhs_class (code);
751 lhs = gimple_assign_lhs (stmt);
753 if (code == VEC_PERM_EXPR)
755 lower_vec_perm (gsi);
756 return;
759 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
760 return;
762 rhs1 = gimple_assign_rhs1 (stmt);
763 type = gimple_expr_type (stmt);
764 if (rhs_class == GIMPLE_BINARY_RHS)
765 rhs2 = gimple_assign_rhs2 (stmt);
767 if (TREE_CODE (type) != VECTOR_TYPE)
768 return;
770 if (code == NOP_EXPR
771 || code == FLOAT_EXPR
772 || code == FIX_TRUNC_EXPR
773 || code == VIEW_CONVERT_EXPR)
774 return;
776 gcc_assert (code != CONVERT_EXPR);
778 /* The signedness is determined from input argument. */
779 if (code == VEC_UNPACK_FLOAT_HI_EXPR
780 || code == VEC_UNPACK_FLOAT_LO_EXPR)
781 type = TREE_TYPE (rhs1);
783 /* Choose between vector shift/rotate by vector and vector shift/rotate by
784 scalar */
785 if (code == LSHIFT_EXPR
786 || code == RSHIFT_EXPR
787 || code == LROTATE_EXPR
788 || code == RROTATE_EXPR)
790 optab opv;
792 /* Check whether we have vector <op> {x,x,x,x} where x
793 could be a scalar variable or a constant. Transform
794 vector <op> {x,x,x,x} ==> vector <op> scalar. */
795 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
797 tree first;
798 gimple def_stmt;
800 if ((TREE_CODE (rhs2) == VECTOR_CST
801 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
802 || (TREE_CODE (rhs2) == SSA_NAME
803 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
804 && gimple_assign_single_p (def_stmt)
805 && (first = uniform_vector_p
806 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
808 gimple_assign_set_rhs2 (stmt, first);
809 update_stmt (stmt);
810 rhs2 = first;
814 opv = optab_for_tree_code (code, type, optab_vector);
815 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
816 op = opv;
817 else
819 op = optab_for_tree_code (code, type, optab_scalar);
821 /* The rtl expander will expand vector/scalar as vector/vector
822 if necessary. Don't bother converting the stmt here. */
823 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
824 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
825 return;
828 else
829 op = optab_for_tree_code (code, type, optab_default);
831 /* For widening/narrowing vector operations, the relevant type is of the
832 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
833 calculated in the same way above. */
834 if (code == WIDEN_SUM_EXPR
835 || code == VEC_WIDEN_MULT_HI_EXPR
836 || code == VEC_WIDEN_MULT_LO_EXPR
837 || code == VEC_UNPACK_HI_EXPR
838 || code == VEC_UNPACK_LO_EXPR
839 || code == VEC_PACK_TRUNC_EXPR
840 || code == VEC_PACK_SAT_EXPR
841 || code == VEC_PACK_FIX_TRUNC_EXPR
842 || code == VEC_WIDEN_LSHIFT_HI_EXPR
843 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
844 type = TREE_TYPE (rhs1);
846 /* Optabs will try converting a negation into a subtraction, so
847 look for it as well. TODO: negation of floating-point vectors
848 might be turned into an exclusive OR toggling the sign bit. */
849 if (op == NULL
850 && code == NEGATE_EXPR
851 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
852 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
854 /* For very wide vectors, try using a smaller vector mode. */
855 compute_type = type;
856 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
858 tree vector_compute_type
859 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op,
860 TYPE_SATURATING (TREE_TYPE (type)));
861 if (vector_compute_type != NULL_TREE
862 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
863 < TYPE_VECTOR_SUBPARTS (compute_type))
864 && (optab_handler (op, TYPE_MODE (vector_compute_type))
865 != CODE_FOR_nothing))
866 compute_type = vector_compute_type;
869 /* If we are breaking a BLKmode vector into smaller pieces,
870 type_for_widest_vector_mode has already looked into the optab,
871 so skip these checks. */
872 if (compute_type == type)
874 compute_mode = TYPE_MODE (compute_type);
875 if (VECTOR_MODE_P (compute_mode)
876 && op != NULL
877 && optab_handler (op, compute_mode) != CODE_FOR_nothing)
878 return;
879 else
880 /* There is no operation in hardware, so fall back to scalars. */
881 compute_type = TREE_TYPE (type);
884 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
885 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
887 /* Leave expression untouched for later expansion. */
888 if (new_rhs == NULL_TREE)
889 return;
891 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
892 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
893 new_rhs);
895 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
896 way to do it is change expand_vector_operation and its callees to
897 return a tree_code, RHS1 and RHS2 instead of a tree. */
898 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
899 update_stmt (gsi_stmt (*gsi));
902 /* Use this to lower vector operations introduced by the vectorizer,
903 if it may need the bit-twiddling tricks implemented in this file. */
905 static bool
906 gate_expand_vector_operations_ssa (void)
908 return optimize == 0;
911 static unsigned int
912 expand_vector_operations (void)
914 gimple_stmt_iterator gsi;
915 basic_block bb;
916 bool cfg_changed = false;
918 FOR_EACH_BB (bb)
920 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
922 expand_vector_operations_1 (&gsi);
923 /* ??? If we do not cleanup EH then we will ICE in
924 verification. But in reality we have created wrong-code
925 as we did not properly transition EH info and edges to
926 the piecewise computations. */
927 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
928 && gimple_purge_dead_eh_edges (bb))
929 cfg_changed = true;
933 return cfg_changed ? TODO_cleanup_cfg : 0;
936 struct gimple_opt_pass pass_lower_vector =
939 GIMPLE_PASS,
940 "veclower", /* name */
941 gate_expand_vector_operations_ssa, /* gate */
942 expand_vector_operations, /* execute */
943 NULL, /* sub */
944 NULL, /* next */
945 0, /* static_pass_number */
946 TV_NONE, /* tv_id */
947 PROP_cfg, /* properties_required */
948 0, /* properties_provided */
949 0, /* properties_destroyed */
950 0, /* todo_flags_start */
951 TODO_update_ssa /* todo_flags_finish */
952 | TODO_verify_ssa
953 | TODO_verify_stmts | TODO_verify_flow
954 | TODO_cleanup_cfg
958 struct gimple_opt_pass pass_lower_vector_ssa =
961 GIMPLE_PASS,
962 "veclower2", /* name */
963 0, /* gate */
964 expand_vector_operations, /* execute */
965 NULL, /* sub */
966 NULL, /* next */
967 0, /* static_pass_number */
968 TV_NONE, /* tv_id */
969 PROP_cfg, /* properties_required */
970 0, /* properties_provided */
971 0, /* properties_destroyed */
972 0, /* todo_flags_start */
973 TODO_update_ssa /* todo_flags_finish */
974 | TODO_verify_ssa
975 | TODO_verify_stmts | TODO_verify_flow
976 | TODO_cleanup_cfg
980 #include "gt-tree-vect-generic.h"