Import gcc-4.4.1
[dragonfly.git] / contrib / gcc-4.4 / gcc / tree-vect-generic.c
blobc472442c6f9758c34d75dc42268944a8ea8efc2a
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tree.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "insn-codes.h"
28 #include "diagnostic.h"
29 #include "optabs.h"
30 #include "machmode.h"
31 #include "langhooks.h"
32 #include "tree-flow.h"
33 #include "gimple.h"
34 #include "tree-iterator.h"
35 #include "tree-pass.h"
36 #include "flags.h"
37 #include "ggc.h"
40 /* Build a constant of type TYPE, made of VALUE's bits replicated
41 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
42 static tree
43 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
45 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
46 int n = HOST_BITS_PER_WIDE_INT / width;
47 unsigned HOST_WIDE_INT low, high, mask;
48 tree ret;
50 gcc_assert (n);
52 if (width == HOST_BITS_PER_WIDE_INT)
53 low = value;
54 else
56 mask = ((HOST_WIDE_INT)1 << width) - 1;
57 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
60 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
61 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
62 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
63 high = 0;
64 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
65 high = low;
66 else
67 gcc_unreachable ();
69 ret = build_int_cst_wide (type, low, high);
70 return ret;
73 static GTY(()) tree vector_inner_type;
74 static GTY(()) tree vector_last_type;
75 static GTY(()) int vector_last_nunits;
77 /* Return a suitable vector types made of SUBPARTS units each of mode
78 "word_mode" (the global variable). */
79 static tree
80 build_word_mode_vector_type (int nunits)
82 if (!vector_inner_type)
83 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
84 else if (vector_last_nunits == nunits)
86 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
87 return vector_last_type;
90 /* We build a new type, but we canonicalize it nevertheless,
91 because it still saves some memory. */
92 vector_last_nunits = nunits;
93 vector_last_type = type_hash_canon (nunits,
94 build_vector_type (vector_inner_type,
95 nunits));
96 return vector_last_type;
99 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
100 tree, tree, tree, tree, tree, enum tree_code);
102 static inline tree
103 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
104 tree t, tree bitsize, tree bitpos)
106 if (bitpos)
107 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
108 else
109 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
112 static tree
113 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
114 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
115 enum tree_code code)
117 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
118 return gimplify_build1 (gsi, code, inner_type, a);
121 static tree
122 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
123 tree bitpos, tree bitsize, enum tree_code code)
125 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
126 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
127 return gimplify_build2 (gsi, code, inner_type, a, b);
130 /* Expand vector addition to scalars. This does bit twiddling
131 in order to increase parallelism:
133 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
134 (a ^ b) & 0x80808080
136 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
137 (a ^ ~b) & 0x80808080
139 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
141 This optimization should be done only if 4 vector items or more
142 fit into a word. */
143 static tree
144 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
145 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
146 enum tree_code code)
148 tree inner_type = TREE_TYPE (TREE_TYPE (a));
149 unsigned HOST_WIDE_INT max;
150 tree low_bits, high_bits, a_low, b_low, result_low, signs;
152 max = GET_MODE_MASK (TYPE_MODE (inner_type));
153 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
154 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
156 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
157 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
159 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
160 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
161 if (code == PLUS_EXPR)
162 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
163 else
165 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
166 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
169 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
170 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
171 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
174 static tree
175 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
176 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
177 tree bitsize ATTRIBUTE_UNUSED,
178 enum tree_code code ATTRIBUTE_UNUSED)
180 tree inner_type = TREE_TYPE (TREE_TYPE (b));
181 HOST_WIDE_INT max;
182 tree low_bits, high_bits, b_low, result_low, signs;
184 max = GET_MODE_MASK (TYPE_MODE (inner_type));
185 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
186 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
188 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
190 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
191 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
192 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
193 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
194 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
197 /* Expand a vector operation to scalars, by using many operations
198 whose type is the vector type's inner type. */
199 static tree
200 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
201 tree type, tree inner_type,
202 tree a, tree b, enum tree_code code)
204 VEC(constructor_elt,gc) *v;
205 tree part_width = TYPE_SIZE (inner_type);
206 tree index = bitsize_int (0);
207 int nunits = TYPE_VECTOR_SUBPARTS (type);
208 int delta = tree_low_cst (part_width, 1)
209 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
210 int i;
212 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
213 for (i = 0; i < nunits;
214 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
216 tree result = f (gsi, inner_type, a, b, index, part_width, code);
217 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
218 ce->index = NULL_TREE;
219 ce->value = result;
222 return build_constructor (type, v);
225 /* Expand a vector operation to scalars with the freedom to use
226 a scalar integer type, or to use a different size for the items
227 in the vector type. */
228 static tree
229 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
230 tree a, tree b,
231 enum tree_code code)
233 tree result, compute_type;
234 enum machine_mode mode;
235 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
237 /* We have three strategies. If the type is already correct, just do
238 the operation an element at a time. Else, if the vector is wider than
239 one word, do it a word at a time; finally, if the vector is smaller
240 than one word, do it as a scalar. */
241 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
242 return expand_vector_piecewise (gsi, f,
243 type, TREE_TYPE (type),
244 a, b, code);
245 else if (n_words > 1)
247 tree word_type = build_word_mode_vector_type (n_words);
248 result = expand_vector_piecewise (gsi, f,
249 word_type, TREE_TYPE (word_type),
250 a, b, code);
251 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
252 GSI_SAME_STMT);
254 else
256 /* Use a single scalar operation with a mode no wider than word_mode. */
257 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
258 compute_type = lang_hooks.types.type_for_mode (mode, 1);
259 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
262 return result;
265 /* Expand a vector operation to scalars; for integer types we can use
266 special bit twiddling tricks to do the sums a word at a time, using
267 function F_PARALLEL instead of F. These tricks are done only if
268 they can process at least four items, that is, only if the vector
269 holds at least four items and if a word can hold four items. */
270 static tree
271 expand_vector_addition (gimple_stmt_iterator *gsi,
272 elem_op_func f, elem_op_func f_parallel,
273 tree type, tree a, tree b, enum tree_code code)
275 int parts_per_word = UNITS_PER_WORD
276 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
278 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
279 && parts_per_word >= 4
280 && TYPE_VECTOR_SUBPARTS (type) >= 4)
281 return expand_vector_parallel (gsi, f_parallel,
282 type, a, b, code);
283 else
284 return expand_vector_piecewise (gsi, f,
285 type, TREE_TYPE (type),
286 a, b, code);
289 static tree
290 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
291 gimple assign, enum tree_code code)
293 enum machine_mode compute_mode = TYPE_MODE (compute_type);
295 /* If the compute mode is not a vector mode (hence we are not decomposing
296 a BLKmode vector to smaller, hardware-supported vectors), we may want
297 to expand the operations in parallel. */
298 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
299 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
300 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
301 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
302 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
303 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
304 switch (code)
306 case PLUS_EXPR:
307 case MINUS_EXPR:
308 if (!TYPE_OVERFLOW_TRAPS (type))
309 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
310 gimple_assign_rhs1 (assign),
311 gimple_assign_rhs2 (assign), code);
312 break;
314 case NEGATE_EXPR:
315 if (!TYPE_OVERFLOW_TRAPS (type))
316 return expand_vector_addition (gsi, do_unop, do_negate, type,
317 gimple_assign_rhs1 (assign),
318 NULL_TREE, code);
319 break;
321 case BIT_AND_EXPR:
322 case BIT_IOR_EXPR:
323 case BIT_XOR_EXPR:
324 return expand_vector_parallel (gsi, do_binop, type,
325 gimple_assign_rhs1 (assign),
326 gimple_assign_rhs2 (assign), code);
328 case BIT_NOT_EXPR:
329 return expand_vector_parallel (gsi, do_unop, type,
330 gimple_assign_rhs1 (assign),
331 NULL_TREE, code);
333 default:
334 break;
337 if (TREE_CODE_CLASS (code) == tcc_unary)
338 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
339 gimple_assign_rhs1 (assign),
340 NULL_TREE, code);
341 else
342 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
343 gimple_assign_rhs1 (assign),
344 gimple_assign_rhs2 (assign), code);
347 /* Return a type for the widest vector mode whose components are of mode
348 INNER_MODE, or NULL_TREE if none is found.
349 SATP is true for saturating fixed-point types. */
351 static tree
352 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp)
354 enum machine_mode best_mode = VOIDmode, mode;
355 int best_nunits = 0;
357 if (SCALAR_FLOAT_MODE_P (inner_mode))
358 mode = MIN_MODE_VECTOR_FLOAT;
359 else if (SCALAR_FRACT_MODE_P (inner_mode))
360 mode = MIN_MODE_VECTOR_FRACT;
361 else if (SCALAR_UFRACT_MODE_P (inner_mode))
362 mode = MIN_MODE_VECTOR_UFRACT;
363 else if (SCALAR_ACCUM_MODE_P (inner_mode))
364 mode = MIN_MODE_VECTOR_ACCUM;
365 else if (SCALAR_UACCUM_MODE_P (inner_mode))
366 mode = MIN_MODE_VECTOR_UACCUM;
367 else
368 mode = MIN_MODE_VECTOR_INT;
370 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
371 if (GET_MODE_INNER (mode) == inner_mode
372 && GET_MODE_NUNITS (mode) > best_nunits
373 && optab_handler (op, mode)->insn_code != CODE_FOR_nothing)
374 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
376 if (best_mode == VOIDmode)
377 return NULL_TREE;
378 else
380 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */
381 if (ALL_FIXED_POINT_MODE_P (best_mode))
382 return lang_hooks.types.type_for_mode (best_mode, satp);
384 return lang_hooks.types.type_for_mode (best_mode, 1);
388 /* Process one statement. If we identify a vector operation, expand it. */
390 static void
391 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
393 gimple stmt = gsi_stmt (*gsi);
394 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
395 enum tree_code code;
396 enum machine_mode compute_mode;
397 optab op;
398 enum gimple_rhs_class rhs_class;
399 tree new_rhs;
401 if (gimple_code (stmt) != GIMPLE_ASSIGN)
402 return;
404 code = gimple_assign_rhs_code (stmt);
405 rhs_class = get_gimple_rhs_class (code);
407 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
408 return;
410 lhs = gimple_assign_lhs (stmt);
411 rhs1 = gimple_assign_rhs1 (stmt);
412 type = gimple_expr_type (stmt);
413 if (rhs_class == GIMPLE_BINARY_RHS)
414 rhs2 = gimple_assign_rhs2 (stmt);
416 if (TREE_CODE (type) != VECTOR_TYPE)
417 return;
419 if (code == NOP_EXPR
420 || code == FLOAT_EXPR
421 || code == FIX_TRUNC_EXPR
422 || code == VIEW_CONVERT_EXPR)
423 return;
425 gcc_assert (code != CONVERT_EXPR);
427 /* The signedness is determined from input argument. */
428 if (code == VEC_UNPACK_FLOAT_HI_EXPR
429 || code == VEC_UNPACK_FLOAT_LO_EXPR)
430 type = TREE_TYPE (rhs1);
432 /* Choose between vector shift/rotate by vector and vector shift/rotate by
433 scalar */
434 if (code == LSHIFT_EXPR
435 || code == RSHIFT_EXPR
436 || code == LROTATE_EXPR
437 || code == RROTATE_EXPR)
439 /* If the 2nd argument is vector, we need a vector/vector shift */
440 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2))))
441 op = optab_for_tree_code (code, type, optab_vector);
442 else
444 /* Try for a vector/scalar shift, and if we don't have one, see if we
445 have a vector/vector shift */
446 op = optab_for_tree_code (code, type, optab_scalar);
447 if (!op
448 || (op->handlers[(int) TYPE_MODE (type)].insn_code
449 == CODE_FOR_nothing))
450 op = optab_for_tree_code (code, type, optab_vector);
453 else
454 op = optab_for_tree_code (code, type, optab_default);
456 /* For widening/narrowing vector operations, the relevant type is of the
457 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
458 calculated in the same way above. */
459 if (code == WIDEN_SUM_EXPR
460 || code == VEC_WIDEN_MULT_HI_EXPR
461 || code == VEC_WIDEN_MULT_LO_EXPR
462 || code == VEC_UNPACK_HI_EXPR
463 || code == VEC_UNPACK_LO_EXPR
464 || code == VEC_PACK_TRUNC_EXPR
465 || code == VEC_PACK_SAT_EXPR
466 || code == VEC_PACK_FIX_TRUNC_EXPR)
467 type = TREE_TYPE (rhs1);
469 /* Optabs will try converting a negation into a subtraction, so
470 look for it as well. TODO: negation of floating-point vectors
471 might be turned into an exclusive OR toggling the sign bit. */
472 if (op == NULL
473 && code == NEGATE_EXPR
474 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
475 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
477 /* For very wide vectors, try using a smaller vector mode. */
478 compute_type = type;
479 if (TYPE_MODE (type) == BLKmode && op)
481 tree vector_compute_type
482 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op,
483 TYPE_SATURATING (TREE_TYPE (type)));
484 if (vector_compute_type != NULL_TREE
485 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
486 < TYPE_VECTOR_SUBPARTS (compute_type)))
487 compute_type = vector_compute_type;
490 /* If we are breaking a BLKmode vector into smaller pieces,
491 type_for_widest_vector_mode has already looked into the optab,
492 so skip these checks. */
493 if (compute_type == type)
495 compute_mode = TYPE_MODE (compute_type);
496 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
497 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT
498 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FRACT
499 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UFRACT
500 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_ACCUM
501 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UACCUM)
502 && op != NULL
503 && optab_handler (op, compute_mode)->insn_code != CODE_FOR_nothing)
504 return;
505 else
506 /* There is no operation in hardware, so fall back to scalars. */
507 compute_type = TREE_TYPE (type);
510 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
511 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
512 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
513 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
514 new_rhs);
516 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
517 way to do it is change expand_vector_operation and its callees to
518 return a tree_code, RHS1 and RHS2 instead of a tree. */
519 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
521 gimple_set_modified (gsi_stmt (*gsi), true);
524 /* Use this to lower vector operations introduced by the vectorizer,
525 if it may need the bit-twiddling tricks implemented in this file. */
527 static bool
528 gate_expand_vector_operations (void)
530 return flag_tree_vectorize != 0;
533 static unsigned int
534 expand_vector_operations (void)
536 gimple_stmt_iterator gsi;
537 basic_block bb;
539 FOR_EACH_BB (bb)
541 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
543 expand_vector_operations_1 (&gsi);
544 update_stmt_if_modified (gsi_stmt (gsi));
547 return 0;
550 struct gimple_opt_pass pass_lower_vector =
553 GIMPLE_PASS,
554 "veclower", /* name */
555 0, /* gate */
556 expand_vector_operations, /* execute */
557 NULL, /* sub */
558 NULL, /* next */
559 0, /* static_pass_number */
560 0, /* tv_id */
561 PROP_cfg, /* properties_required */
562 0, /* properties_provided */
563 0, /* properties_destroyed */
564 0, /* todo_flags_start */
565 TODO_dump_func | TODO_ggc_collect
566 | TODO_verify_stmts /* todo_flags_finish */
570 struct gimple_opt_pass pass_lower_vector_ssa =
573 GIMPLE_PASS,
574 "veclower2", /* name */
575 gate_expand_vector_operations, /* gate */
576 expand_vector_operations, /* execute */
577 NULL, /* sub */
578 NULL, /* next */
579 0, /* static_pass_number */
580 0, /* tv_id */
581 PROP_cfg, /* properties_required */
582 0, /* properties_provided */
583 0, /* properties_destroyed */
584 0, /* todo_flags_start */
585 TODO_dump_func | TODO_update_ssa /* todo_flags_finish */
586 | TODO_verify_ssa
587 | TODO_verify_stmts | TODO_verify_flow
591 #include "gt-tree-vect-generic.h"