* defaults.h (FRAME_GROWS_DOWNWARD): Define to 0 if not defined.
[official-gcc.git] / gcc / tree-vect-generic.c
blob015a727de3a66e4a0051919c2c595c9641b5a935
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "expr.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
30 #include "optabs.h"
31 #include "machmode.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "flags.h"
38 #include "ggc.h"
41 /* Build a constant of type TYPE, made of VALUE's bits replicated
42 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
43 static tree
44 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
46 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
47 int n = HOST_BITS_PER_WIDE_INT / width;
48 unsigned HOST_WIDE_INT low, high, mask;
49 tree ret;
51 gcc_assert (n);
53 if (width == HOST_BITS_PER_WIDE_INT)
54 low = value;
55 else
57 mask = ((HOST_WIDE_INT)1 << width) - 1;
58 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
61 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
62 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
63 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
64 high = 0;
65 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
66 high = low;
67 else
68 gcc_unreachable ();
70 ret = build_int_cst_wide (type, low, high);
71 return ret;
74 static GTY(()) tree vector_inner_type;
75 static GTY(()) tree vector_last_type;
76 static GTY(()) int vector_last_nunits;
78 /* Return a suitable vector types made of SUBPARTS units each of mode
79 "word_mode" (the global variable). */
80 static tree
81 build_word_mode_vector_type (int nunits)
83 if (!vector_inner_type)
84 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
85 else if (vector_last_nunits == nunits)
87 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
88 return vector_last_type;
91 /* We build a new type, but we canonicalize it nevertheless,
92 because it still saves some memory. */
93 vector_last_nunits = nunits;
94 vector_last_type = type_hash_canon (nunits,
95 build_vector_type (vector_inner_type,
96 nunits));
97 return vector_last_type;
100 typedef tree (*elem_op_func) (block_stmt_iterator *,
101 tree, tree, tree, tree, tree, enum tree_code);
103 static inline tree
104 tree_vec_extract (block_stmt_iterator *bsi, tree type,
105 tree t, tree bitsize, tree bitpos)
107 if (bitpos)
108 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
110 /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
111 anyway be stored in memory, so prefer NOP_EXPR. */
112 else if (TYPE_MODE (type) == BLKmode)
113 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
114 else
115 return gimplify_build1 (bsi, NOP_EXPR, type, t);
118 static tree
119 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
120 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
121 enum tree_code code)
123 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
124 return gimplify_build1 (bsi, code, inner_type, a);
127 static tree
128 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
129 tree bitpos, tree bitsize, enum tree_code code)
131 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
132 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
133 return gimplify_build2 (bsi, code, inner_type, a, b);
136 /* Expand vector addition to scalars. This does bit twiddling
137 in order to increase parallelism:
139 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
140 (a ^ b) & 0x80808080
142 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
143 (a ^ ~b) & 0x80808080
145 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
147 This optimization should be done only if 4 vector items or more
148 fit into a word. */
149 static tree
150 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
151 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
152 enum tree_code code)
154 tree inner_type = TREE_TYPE (TREE_TYPE (a));
155 unsigned HOST_WIDE_INT max;
156 tree low_bits, high_bits, a_low, b_low, result_low, signs;
158 max = GET_MODE_MASK (TYPE_MODE (inner_type));
159 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
160 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
162 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
163 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
165 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
166 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
167 if (code == PLUS_EXPR)
168 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
169 else
171 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
172 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
175 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
176 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
177 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
180 static tree
181 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
182 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
183 tree bitsize ATTRIBUTE_UNUSED,
184 enum tree_code code ATTRIBUTE_UNUSED)
186 tree inner_type = TREE_TYPE (TREE_TYPE (b));
187 HOST_WIDE_INT max;
188 tree low_bits, high_bits, b_low, result_low, signs;
190 max = GET_MODE_MASK (TYPE_MODE (inner_type));
191 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
192 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
194 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
196 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
197 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
198 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
199 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
200 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
203 /* Expand a vector operation to scalars, by using many operations
204 whose type is the vector type's inner type. */
205 static tree
206 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
207 tree type, tree inner_type,
208 tree a, tree b, enum tree_code code)
210 tree head, *chain = &head;
211 tree part_width = TYPE_SIZE (inner_type);
212 tree index = bitsize_int (0);
213 int nunits = TYPE_VECTOR_SUBPARTS (type);
214 int delta = tree_low_cst (part_width, 1)
215 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
216 int i;
218 for (i = 0; i < nunits;
219 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
221 tree result = f (bsi, inner_type, a, b, index, part_width, code);
222 *chain = tree_cons (NULL_TREE, result, NULL_TREE);
223 chain = &TREE_CHAIN (*chain);
226 return build1 (CONSTRUCTOR, type, head);
229 /* Expand a vector operation to scalars with the freedom to use
230 a scalar integer type, or to use a different size for the items
231 in the vector type. */
232 static tree
233 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
234 tree a, tree b,
235 enum tree_code code)
237 tree result, compute_type;
238 enum machine_mode mode;
239 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
241 /* We have three strategies. If the type is already correct, just do
242 the operation an element at a time. Else, if the vector is wider than
243 one word, do it a word at a time; finally, if the vector is smaller
244 than one word, do it as a scalar. */
245 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
246 return expand_vector_piecewise (bsi, f,
247 type, TREE_TYPE (type),
248 a, b, code);
249 else if (n_words > 1)
251 tree word_type = build_word_mode_vector_type (n_words);
252 result = expand_vector_piecewise (bsi, f,
253 word_type, TREE_TYPE (word_type),
254 a, b, code);
255 result = gimplify_val (bsi, word_type, result);
257 else
259 /* Use a single scalar operation with a mode no wider than word_mode. */
260 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
261 compute_type = lang_hooks.types.type_for_mode (mode, 1);
262 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
265 return result;
268 /* Expand a vector operation to scalars; for integer types we can use
269 special bit twiddling tricks to do the sums a word at a time, using
270 function F_PARALLEL instead of F. These tricks are done only if
271 they can process at least four items, that is, only if the vector
272 holds at least four items and if a word can hold four items. */
273 static tree
274 expand_vector_addition (block_stmt_iterator *bsi,
275 elem_op_func f, elem_op_func f_parallel,
276 tree type, tree a, tree b, enum tree_code code)
278 int parts_per_word = UNITS_PER_WORD
279 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
281 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
282 && parts_per_word >= 4
283 && TYPE_VECTOR_SUBPARTS (type) >= 4)
284 return expand_vector_parallel (bsi, f_parallel,
285 type, a, b, code);
286 else
287 return expand_vector_piecewise (bsi, f,
288 type, TREE_TYPE (type),
289 a, b, code);
292 static tree
293 expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type,
294 tree rhs, enum tree_code code)
296 enum machine_mode compute_mode = TYPE_MODE (compute_type);
298 /* If the compute mode is not a vector mode (hence we are not decomposing
299 a BLKmode vector to smaller, hardware-supported vectors), we may want
300 to expand the operations in parallel. */
301 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
302 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
303 switch (code)
305 case PLUS_EXPR:
306 case MINUS_EXPR:
307 if (!TYPE_TRAP_SIGNED (type))
308 return expand_vector_addition (bsi, do_binop, do_plus_minus, type,
309 TREE_OPERAND (rhs, 0),
310 TREE_OPERAND (rhs, 1), code);
311 break;
313 case NEGATE_EXPR:
314 if (!TYPE_TRAP_SIGNED (type))
315 return expand_vector_addition (bsi, do_unop, do_negate, type,
316 TREE_OPERAND (rhs, 0),
317 NULL_TREE, code);
318 break;
320 case BIT_AND_EXPR:
321 case BIT_IOR_EXPR:
322 case BIT_XOR_EXPR:
323 return expand_vector_parallel (bsi, do_binop, type,
324 TREE_OPERAND (rhs, 0),
325 TREE_OPERAND (rhs, 1), code);
327 case BIT_NOT_EXPR:
328 return expand_vector_parallel (bsi, do_unop, type,
329 TREE_OPERAND (rhs, 0),
330 NULL_TREE, code);
332 default:
333 break;
336 if (TREE_CODE_CLASS (code) == tcc_unary)
337 return expand_vector_piecewise (bsi, do_unop, type, compute_type,
338 TREE_OPERAND (rhs, 0),
339 NULL_TREE, code);
340 else
341 return expand_vector_piecewise (bsi, do_binop, type, compute_type,
342 TREE_OPERAND (rhs, 0),
343 TREE_OPERAND (rhs, 1), code);
346 /* Return a type for the widest vector mode whose components are of mode
347 INNER_MODE, or NULL_TREE if none is found. */
348 static tree
349 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
351 enum machine_mode best_mode = VOIDmode, mode;
352 int best_nunits = 0;
354 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
355 mode = MIN_MODE_VECTOR_FLOAT;
356 else
357 mode = MIN_MODE_VECTOR_INT;
359 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
360 if (GET_MODE_INNER (mode) == inner_mode
361 && GET_MODE_NUNITS (mode) > best_nunits
362 && op->handlers[mode].insn_code != CODE_FOR_nothing)
363 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
365 if (best_mode == VOIDmode)
366 return NULL_TREE;
367 else
368 return lang_hooks.types.type_for_mode (best_mode, 1);
371 /* Process one statement. If we identify a vector operation, expand it. */
373 static void
374 expand_vector_operations_1 (block_stmt_iterator *bsi)
376 tree stmt = bsi_stmt (*bsi);
377 tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type;
378 enum tree_code code;
379 enum machine_mode compute_mode;
380 optab op;
382 switch (TREE_CODE (stmt))
384 case RETURN_EXPR:
385 stmt = TREE_OPERAND (stmt, 0);
386 if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
387 return;
389 /* FALLTHRU */
391 case MODIFY_EXPR:
392 p_lhs = &TREE_OPERAND (stmt, 0);
393 p_rhs = &TREE_OPERAND (stmt, 1);
394 lhs = *p_lhs;
395 rhs = *p_rhs;
396 break;
398 default:
399 return;
402 type = TREE_TYPE (rhs);
403 if (TREE_CODE (type) != VECTOR_TYPE)
404 return;
406 code = TREE_CODE (rhs);
407 if (TREE_CODE_CLASS (code) != tcc_unary
408 && TREE_CODE_CLASS (code) != tcc_binary)
409 return;
411 if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
412 return;
414 gcc_assert (code != CONVERT_EXPR);
415 op = optab_for_tree_code (code, type);
417 /* Optabs will try converting a negation into a subtraction, so
418 look for it as well. TODO: negation of floating-point vectors
419 might be turned into an exclusive OR toggling the sign bit. */
420 if (op == NULL
421 && code == NEGATE_EXPR
422 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
423 op = optab_for_tree_code (MINUS_EXPR, type);
425 /* For very wide vectors, try using a smaller vector mode. */
426 compute_type = type;
427 if (TYPE_MODE (type) == BLKmode && op)
429 tree vector_compute_type
430 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
431 if (vector_compute_type != NULL_TREE)
432 compute_type = vector_compute_type;
435 /* If we are breaking a BLKmode vector into smaller pieces,
436 type_for_widest_vector_mode has already looked into the optab,
437 so skip these checks. */
438 if (compute_type == type)
440 compute_mode = TYPE_MODE (compute_type);
441 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
442 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
443 && op != NULL
444 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
445 return;
446 else
447 /* There is no operation in hardware, so fall back to scalars. */
448 compute_type = TREE_TYPE (type);
451 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
452 rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
453 if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
454 *p_rhs = rhs;
455 else
457 /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
458 be stored in memory anyway, so prefer NOP_EXPR. We should also try
459 performing the VIEW_CONVERT_EXPR on the left side of the
460 assignment. */
461 if (TYPE_MODE (TREE_TYPE (rhs)) == BLKmode)
462 *p_rhs = gimplify_build1 (bsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs);
463 else
464 *p_rhs = gimplify_build1 (bsi, NOP_EXPR, TREE_TYPE (lhs), rhs);
467 mark_stmt_modified (bsi_stmt (*bsi));
470 /* Use this to lower vector operations introduced by the vectorizer,
471 if it may need the bit-twiddling tricks implemented in this file. */
473 static bool
474 gate_expand_vector_operations (void)
476 return flag_tree_vectorize != 0;
479 static void
480 expand_vector_operations (void)
482 block_stmt_iterator bsi;
483 basic_block bb;
485 FOR_EACH_BB (bb)
487 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
489 expand_vector_operations_1 (&bsi);
490 update_stmt_if_modified (bsi_stmt (bsi));
495 struct tree_opt_pass pass_lower_vector =
497 "veclower", /* name */
498 0, /* gate */
499 expand_vector_operations, /* execute */
500 NULL, /* sub */
501 NULL, /* next */
502 0, /* static_pass_number */
503 0, /* tv_id */
504 PROP_cfg, /* properties_required */
505 0, /* properties_provided */
506 0, /* properties_destroyed */
507 0, /* todo_flags_start */
508 TODO_dump_func | TODO_ggc_collect
509 | TODO_verify_stmts, /* todo_flags_finish */
510 0 /* letter */
513 struct tree_opt_pass pass_lower_vector_ssa =
515 "veclower2", /* name */
516 gate_expand_vector_operations, /* gate */
517 expand_vector_operations, /* execute */
518 NULL, /* sub */
519 NULL, /* next */
520 0, /* static_pass_number */
521 0, /* tv_id */
522 PROP_cfg, /* properties_required */
523 0, /* properties_provided */
524 0, /* properties_destroyed */
525 0, /* todo_flags_start */
526 TODO_dump_func | TODO_update_ssa /* todo_flags_finish */
527 | TODO_verify_ssa
528 | TODO_verify_stmts | TODO_verify_flow,
529 0 /* letter */
532 #include "gt-tree-vect-generic.h"