* sh.c (find_sole_member): New function.
[official-gcc.git] / gcc / tree-vect-generic.c
blobf9c9fda31047271902754e05f3f5fd4585984106
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "expr.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
30 #include "optabs.h"
31 #include "machmode.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "flags.h"
38 #include "ggc.h"
41 /* Build a constant of type TYPE, made of VALUE's bits replicated
42 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
43 static tree
44 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
46 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
47 int n = HOST_BITS_PER_WIDE_INT / width;
48 unsigned HOST_WIDE_INT low, high, mask;
49 tree ret;
51 gcc_assert (n);
53 if (width == HOST_BITS_PER_WIDE_INT)
54 low = value;
55 else
57 mask = ((HOST_WIDE_INT)1 << width) - 1;
58 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
61 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
62 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
63 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
64 high = 0;
65 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
66 high = low;
67 else
68 gcc_unreachable ();
70 ret = build_int_cst_wide (type, low, high);
71 return ret;
74 static GTY(()) tree vector_inner_type;
75 static GTY(()) tree vector_last_type;
76 static GTY(()) int vector_last_nunits;
78 /* Return a suitable vector types made of SUBPARTS units each of mode
79 "word_mode" (the global variable). */
80 static tree
81 build_word_mode_vector_type (int nunits)
83 if (!vector_inner_type)
84 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
85 else if (vector_last_nunits == nunits)
87 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
88 return vector_last_type;
91 /* We build a new type, but we canonicalize it nevertheless,
92 because it still saves some memory. */
93 vector_last_nunits = nunits;
94 vector_last_type = type_hash_canon (nunits,
95 build_vector_type (vector_inner_type,
96 nunits));
97 return vector_last_type;
100 typedef tree (*elem_op_func) (block_stmt_iterator *,
101 tree, tree, tree, tree, tree, enum tree_code);
103 static inline tree
104 tree_vec_extract (block_stmt_iterator *bsi, tree type,
105 tree t, tree bitsize, tree bitpos)
107 if (bitpos)
108 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
110 /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
111 anyway be stored in memory, so prefer NOP_EXPR. */
112 else if (TYPE_MODE (type) == BLKmode)
113 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
114 else
115 return gimplify_build1 (bsi, NOP_EXPR, type, t);
118 static tree
119 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
120 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
121 enum tree_code code)
123 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
124 return gimplify_build1 (bsi, code, inner_type, a);
127 static tree
128 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
129 tree bitpos, tree bitsize, enum tree_code code)
131 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
132 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
133 return gimplify_build2 (bsi, code, inner_type, a, b);
136 /* Expand vector addition to scalars. This does bit twiddling
137 in order to increase parallelism:
139 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
140 (a ^ b) & 0x80808080
142 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
143 (a ^ ~b) & 0x80808080
145 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
147 This optimization should be done only if 4 vector items or more
148 fit into a word. */
149 static tree
150 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
151 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
152 enum tree_code code)
154 tree inner_type = TREE_TYPE (TREE_TYPE (a));
155 unsigned HOST_WIDE_INT max;
156 tree low_bits, high_bits, a_low, b_low, result_low, signs;
158 max = GET_MODE_MASK (TYPE_MODE (inner_type));
159 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
160 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
162 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
163 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
165 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
166 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
167 if (code == PLUS_EXPR)
168 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
169 else
171 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
172 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
175 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
176 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
177 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
180 static tree
181 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
182 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
183 tree bitsize ATTRIBUTE_UNUSED,
184 enum tree_code code ATTRIBUTE_UNUSED)
186 tree inner_type = TREE_TYPE (TREE_TYPE (b));
187 HOST_WIDE_INT max;
188 tree low_bits, high_bits, b_low, result_low, signs;
190 max = GET_MODE_MASK (TYPE_MODE (inner_type));
191 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
192 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
194 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
196 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
197 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
198 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
199 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
200 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
203 /* Expand a vector operation to scalars, by using many operations
204 whose type is the vector type's inner type. */
205 static tree
206 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
207 tree type, tree inner_type,
208 tree a, tree b, enum tree_code code)
210 VEC(constructor_elt,gc) *v;
211 tree part_width = TYPE_SIZE (inner_type);
212 tree index = bitsize_int (0);
213 int nunits = TYPE_VECTOR_SUBPARTS (type);
214 int delta = tree_low_cst (part_width, 1)
215 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
216 int i;
218 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
219 for (i = 0; i < nunits;
220 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
222 tree result = f (bsi, inner_type, a, b, index, part_width, code);
223 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
224 ce->index = NULL_TREE;
225 ce->value = result;
228 return build_constructor (type, v);
231 /* Expand a vector operation to scalars with the freedom to use
232 a scalar integer type, or to use a different size for the items
233 in the vector type. */
234 static tree
235 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
236 tree a, tree b,
237 enum tree_code code)
239 tree result, compute_type;
240 enum machine_mode mode;
241 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
243 /* We have three strategies. If the type is already correct, just do
244 the operation an element at a time. Else, if the vector is wider than
245 one word, do it a word at a time; finally, if the vector is smaller
246 than one word, do it as a scalar. */
247 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
248 return expand_vector_piecewise (bsi, f,
249 type, TREE_TYPE (type),
250 a, b, code);
251 else if (n_words > 1)
253 tree word_type = build_word_mode_vector_type (n_words);
254 result = expand_vector_piecewise (bsi, f,
255 word_type, TREE_TYPE (word_type),
256 a, b, code);
257 result = gimplify_val (bsi, word_type, result);
259 else
261 /* Use a single scalar operation with a mode no wider than word_mode. */
262 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
263 compute_type = lang_hooks.types.type_for_mode (mode, 1);
264 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
267 return result;
270 /* Expand a vector operation to scalars; for integer types we can use
271 special bit twiddling tricks to do the sums a word at a time, using
272 function F_PARALLEL instead of F. These tricks are done only if
273 they can process at least four items, that is, only if the vector
274 holds at least four items and if a word can hold four items. */
275 static tree
276 expand_vector_addition (block_stmt_iterator *bsi,
277 elem_op_func f, elem_op_func f_parallel,
278 tree type, tree a, tree b, enum tree_code code)
280 int parts_per_word = UNITS_PER_WORD
281 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
283 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
284 && parts_per_word >= 4
285 && TYPE_VECTOR_SUBPARTS (type) >= 4)
286 return expand_vector_parallel (bsi, f_parallel,
287 type, a, b, code);
288 else
289 return expand_vector_piecewise (bsi, f,
290 type, TREE_TYPE (type),
291 a, b, code);
294 static tree
295 expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type,
296 tree rhs, enum tree_code code)
298 enum machine_mode compute_mode = TYPE_MODE (compute_type);
300 /* If the compute mode is not a vector mode (hence we are not decomposing
301 a BLKmode vector to smaller, hardware-supported vectors), we may want
302 to expand the operations in parallel. */
303 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
304 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
305 switch (code)
307 case PLUS_EXPR:
308 case MINUS_EXPR:
309 if (!TYPE_TRAP_SIGNED (type))
310 return expand_vector_addition (bsi, do_binop, do_plus_minus, type,
311 TREE_OPERAND (rhs, 0),
312 TREE_OPERAND (rhs, 1), code);
313 break;
315 case NEGATE_EXPR:
316 if (!TYPE_TRAP_SIGNED (type))
317 return expand_vector_addition (bsi, do_unop, do_negate, type,
318 TREE_OPERAND (rhs, 0),
319 NULL_TREE, code);
320 break;
322 case BIT_AND_EXPR:
323 case BIT_IOR_EXPR:
324 case BIT_XOR_EXPR:
325 return expand_vector_parallel (bsi, do_binop, type,
326 TREE_OPERAND (rhs, 0),
327 TREE_OPERAND (rhs, 1), code);
329 case BIT_NOT_EXPR:
330 return expand_vector_parallel (bsi, do_unop, type,
331 TREE_OPERAND (rhs, 0),
332 NULL_TREE, code);
334 default:
335 break;
338 if (TREE_CODE_CLASS (code) == tcc_unary)
339 return expand_vector_piecewise (bsi, do_unop, type, compute_type,
340 TREE_OPERAND (rhs, 0),
341 NULL_TREE, code);
342 else
343 return expand_vector_piecewise (bsi, do_binop, type, compute_type,
344 TREE_OPERAND (rhs, 0),
345 TREE_OPERAND (rhs, 1), code);
348 /* Return a type for the widest vector mode whose components are of mode
349 INNER_MODE, or NULL_TREE if none is found. */
350 static tree
351 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
353 enum machine_mode best_mode = VOIDmode, mode;
354 int best_nunits = 0;
356 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
357 mode = MIN_MODE_VECTOR_FLOAT;
358 else
359 mode = MIN_MODE_VECTOR_INT;
361 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
362 if (GET_MODE_INNER (mode) == inner_mode
363 && GET_MODE_NUNITS (mode) > best_nunits
364 && op->handlers[mode].insn_code != CODE_FOR_nothing)
365 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
367 if (best_mode == VOIDmode)
368 return NULL_TREE;
369 else
370 return lang_hooks.types.type_for_mode (best_mode, 1);
373 /* Process one statement. If we identify a vector operation, expand it. */
375 static void
376 expand_vector_operations_1 (block_stmt_iterator *bsi)
378 tree stmt = bsi_stmt (*bsi);
379 tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type;
380 enum tree_code code;
381 enum machine_mode compute_mode;
382 optab op;
384 switch (TREE_CODE (stmt))
386 case RETURN_EXPR:
387 stmt = TREE_OPERAND (stmt, 0);
388 if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
389 return;
391 /* FALLTHRU */
393 case MODIFY_EXPR:
394 p_lhs = &TREE_OPERAND (stmt, 0);
395 p_rhs = &TREE_OPERAND (stmt, 1);
396 lhs = *p_lhs;
397 rhs = *p_rhs;
398 break;
400 default:
401 return;
404 type = TREE_TYPE (rhs);
405 if (TREE_CODE (type) != VECTOR_TYPE)
406 return;
408 code = TREE_CODE (rhs);
409 if (TREE_CODE_CLASS (code) != tcc_unary
410 && TREE_CODE_CLASS (code) != tcc_binary)
411 return;
413 if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
414 return;
416 gcc_assert (code != CONVERT_EXPR);
417 op = optab_for_tree_code (code, type);
419 /* Optabs will try converting a negation into a subtraction, so
420 look for it as well. TODO: negation of floating-point vectors
421 might be turned into an exclusive OR toggling the sign bit. */
422 if (op == NULL
423 && code == NEGATE_EXPR
424 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
425 op = optab_for_tree_code (MINUS_EXPR, type);
427 /* For very wide vectors, try using a smaller vector mode. */
428 compute_type = type;
429 if (TYPE_MODE (type) == BLKmode && op)
431 tree vector_compute_type
432 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
433 if (vector_compute_type != NULL_TREE)
434 compute_type = vector_compute_type;
437 /* If we are breaking a BLKmode vector into smaller pieces,
438 type_for_widest_vector_mode has already looked into the optab,
439 so skip these checks. */
440 if (compute_type == type)
442 compute_mode = TYPE_MODE (compute_type);
443 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
444 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
445 && op != NULL
446 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
447 return;
448 else
449 /* There is no operation in hardware, so fall back to scalars. */
450 compute_type = TREE_TYPE (type);
453 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
454 rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
455 if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
456 *p_rhs = rhs;
457 else
459 /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
460 be stored in memory anyway, so prefer NOP_EXPR. We should also try
461 performing the VIEW_CONVERT_EXPR on the left side of the
462 assignment. */
463 if (TYPE_MODE (TREE_TYPE (rhs)) == BLKmode)
464 *p_rhs = gimplify_build1 (bsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs);
465 else
466 *p_rhs = gimplify_build1 (bsi, NOP_EXPR, TREE_TYPE (lhs), rhs);
469 mark_stmt_modified (bsi_stmt (*bsi));
472 /* Use this to lower vector operations introduced by the vectorizer,
473 if it may need the bit-twiddling tricks implemented in this file. */
475 static bool
476 gate_expand_vector_operations (void)
478 return flag_tree_vectorize != 0;
481 static void
482 expand_vector_operations (void)
484 block_stmt_iterator bsi;
485 basic_block bb;
487 FOR_EACH_BB (bb)
489 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
491 expand_vector_operations_1 (&bsi);
492 update_stmt_if_modified (bsi_stmt (bsi));
497 struct tree_opt_pass pass_lower_vector =
499 "veclower", /* name */
500 0, /* gate */
501 expand_vector_operations, /* execute */
502 NULL, /* sub */
503 NULL, /* next */
504 0, /* static_pass_number */
505 0, /* tv_id */
506 PROP_cfg, /* properties_required */
507 0, /* properties_provided */
508 0, /* properties_destroyed */
509 0, /* todo_flags_start */
510 TODO_dump_func | TODO_ggc_collect
511 | TODO_verify_stmts, /* todo_flags_finish */
512 0 /* letter */
515 struct tree_opt_pass pass_lower_vector_ssa =
517 "veclower2", /* name */
518 gate_expand_vector_operations, /* gate */
519 expand_vector_operations, /* execute */
520 NULL, /* sub */
521 NULL, /* next */
522 0, /* static_pass_number */
523 0, /* tv_id */
524 PROP_cfg, /* properties_required */
525 0, /* properties_provided */
526 0, /* properties_destroyed */
527 0, /* todo_flags_start */
528 TODO_dump_func | TODO_update_ssa /* todo_flags_finish */
529 | TODO_verify_ssa
530 | TODO_verify_stmts | TODO_verify_flow,
531 0 /* letter */
534 #include "gt-tree-vect-generic.h"