1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "coretypes.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
41 /* Build a constant of type TYPE, made of VALUE's bits replicated
42 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
44 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
46 int width
= tree_low_cst (TYPE_SIZE (inner_type
), 1);
47 int n
= HOST_BITS_PER_WIDE_INT
/ width
;
48 unsigned HOST_WIDE_INT low
, high
, mask
;
53 if (width
== HOST_BITS_PER_WIDE_INT
)
57 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
58 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
61 if (TYPE_PRECISION (type
) < HOST_BITS_PER_WIDE_INT
)
62 low
&= ((HOST_WIDE_INT
)1 << TYPE_PRECISION (type
)) - 1, high
= 0;
63 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_WIDE_INT
)
65 else if (TYPE_PRECISION (type
) == 2 * HOST_BITS_PER_WIDE_INT
)
70 ret
= build_int_cst_wide (type
, low
, high
);
74 static GTY(()) tree vector_inner_type
;
75 static GTY(()) tree vector_last_type
;
76 static GTY(()) int vector_last_nunits
;
78 /* Return a suitable vector types made of SUBPARTS units each of mode
79 "word_mode" (the global variable). */
81 build_word_mode_vector_type (int nunits
)
83 if (!vector_inner_type
)
84 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
85 else if (vector_last_nunits
== nunits
)
87 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
88 return vector_last_type
;
91 /* We build a new type, but we canonicalize it nevertheless,
92 because it still saves some memory. */
93 vector_last_nunits
= nunits
;
94 vector_last_type
= type_hash_canon (nunits
,
95 build_vector_type (vector_inner_type
,
97 return vector_last_type
;
100 typedef tree (*elem_op_func
) (block_stmt_iterator
*,
101 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
104 tree_vec_extract (block_stmt_iterator
*bsi
, tree type
,
105 tree t
, tree bitsize
, tree bitpos
)
108 return gimplify_build3 (bsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
110 /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
111 anyway be stored in memory, so prefer NOP_EXPR. */
112 else if (TYPE_MODE (type
) == BLKmode
)
113 return gimplify_build1 (bsi
, VIEW_CONVERT_EXPR
, type
, t
);
115 return gimplify_build1 (bsi
, NOP_EXPR
, type
, t
);
119 do_unop (block_stmt_iterator
*bsi
, tree inner_type
, tree a
,
120 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
123 a
= tree_vec_extract (bsi
, inner_type
, a
, bitsize
, bitpos
);
124 return gimplify_build1 (bsi
, code
, inner_type
, a
);
128 do_binop (block_stmt_iterator
*bsi
, tree inner_type
, tree a
, tree b
,
129 tree bitpos
, tree bitsize
, enum tree_code code
)
131 a
= tree_vec_extract (bsi
, inner_type
, a
, bitsize
, bitpos
);
132 b
= tree_vec_extract (bsi
, inner_type
, b
, bitsize
, bitpos
);
133 return gimplify_build2 (bsi
, code
, inner_type
, a
, b
);
136 /* Expand vector addition to scalars. This does bit twiddling
137 in order to increase parallelism:
139 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
142 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
143 (a ^ ~b) & 0x80808080
145 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
147 This optimization should be done only if 4 vector items or more
150 do_plus_minus (block_stmt_iterator
*bsi
, tree word_type
, tree a
, tree b
,
151 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
154 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
155 unsigned HOST_WIDE_INT max
;
156 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
158 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
159 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
160 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
162 a
= tree_vec_extract (bsi
, word_type
, a
, bitsize
, bitpos
);
163 b
= tree_vec_extract (bsi
, word_type
, b
, bitsize
, bitpos
);
165 signs
= gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
166 b_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
167 if (code
== PLUS_EXPR
)
168 a_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
171 a_low
= gimplify_build2 (bsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
172 signs
= gimplify_build1 (bsi
, BIT_NOT_EXPR
, word_type
, signs
);
175 signs
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
176 result_low
= gimplify_build2 (bsi
, code
, word_type
, a_low
, b_low
);
177 return gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
181 do_negate (block_stmt_iterator
*bsi
, tree word_type
, tree b
,
182 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
183 tree bitsize ATTRIBUTE_UNUSED
,
184 enum tree_code code ATTRIBUTE_UNUSED
)
186 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
188 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
190 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
191 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
192 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
194 b
= tree_vec_extract (bsi
, word_type
, b
, bitsize
, bitpos
);
196 b_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
197 signs
= gimplify_build1 (bsi
, BIT_NOT_EXPR
, word_type
, b
);
198 signs
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
199 result_low
= gimplify_build2 (bsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
200 return gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
203 /* Expand a vector operation to scalars, by using many operations
204 whose type is the vector type's inner type. */
206 expand_vector_piecewise (block_stmt_iterator
*bsi
, elem_op_func f
,
207 tree type
, tree inner_type
,
208 tree a
, tree b
, enum tree_code code
)
210 VEC(constructor_elt
,gc
) *v
;
211 tree part_width
= TYPE_SIZE (inner_type
);
212 tree index
= bitsize_int (0);
213 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
214 int delta
= tree_low_cst (part_width
, 1)
215 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type
)), 1);
218 v
= VEC_alloc(constructor_elt
, gc
, (nunits
+ delta
- 1) / delta
);
219 for (i
= 0; i
< nunits
;
220 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
, 0))
222 tree result
= f (bsi
, inner_type
, a
, b
, index
, part_width
, code
);
223 constructor_elt
*ce
= VEC_quick_push (constructor_elt
, v
, NULL
);
224 ce
->index
= NULL_TREE
;
228 return build_constructor (type
, v
);
231 /* Expand a vector operation to scalars with the freedom to use
232 a scalar integer type, or to use a different size for the items
233 in the vector type. */
235 expand_vector_parallel (block_stmt_iterator
*bsi
, elem_op_func f
, tree type
,
239 tree result
, compute_type
;
240 enum machine_mode mode
;
241 int n_words
= tree_low_cst (TYPE_SIZE_UNIT (type
), 1) / UNITS_PER_WORD
;
243 /* We have three strategies. If the type is already correct, just do
244 the operation an element at a time. Else, if the vector is wider than
245 one word, do it a word at a time; finally, if the vector is smaller
246 than one word, do it as a scalar. */
247 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
248 return expand_vector_piecewise (bsi
, f
,
249 type
, TREE_TYPE (type
),
251 else if (n_words
> 1)
253 tree word_type
= build_word_mode_vector_type (n_words
);
254 result
= expand_vector_piecewise (bsi
, f
,
255 word_type
, TREE_TYPE (word_type
),
257 result
= gimplify_val (bsi
, word_type
, result
);
261 /* Use a single scalar operation with a mode no wider than word_mode. */
262 mode
= mode_for_size (tree_low_cst (TYPE_SIZE (type
), 1), MODE_INT
, 0);
263 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
264 result
= f (bsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
270 /* Expand a vector operation to scalars; for integer types we can use
271 special bit twiddling tricks to do the sums a word at a time, using
272 function F_PARALLEL instead of F. These tricks are done only if
273 they can process at least four items, that is, only if the vector
274 holds at least four items and if a word can hold four items. */
276 expand_vector_addition (block_stmt_iterator
*bsi
,
277 elem_op_func f
, elem_op_func f_parallel
,
278 tree type
, tree a
, tree b
, enum tree_code code
)
280 int parts_per_word
= UNITS_PER_WORD
281 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type
)), 1);
283 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
284 && parts_per_word
>= 4
285 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
286 return expand_vector_parallel (bsi
, f_parallel
,
289 return expand_vector_piecewise (bsi
, f
,
290 type
, TREE_TYPE (type
),
295 expand_vector_operation (block_stmt_iterator
*bsi
, tree type
, tree compute_type
,
296 tree rhs
, enum tree_code code
)
298 enum machine_mode compute_mode
= TYPE_MODE (compute_type
);
300 /* If the compute mode is not a vector mode (hence we are not decomposing
301 a BLKmode vector to smaller, hardware-supported vectors), we may want
302 to expand the operations in parallel. */
303 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
304 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
)
309 if (!TYPE_TRAP_SIGNED (type
))
310 return expand_vector_addition (bsi
, do_binop
, do_plus_minus
, type
,
311 TREE_OPERAND (rhs
, 0),
312 TREE_OPERAND (rhs
, 1), code
);
316 if (!TYPE_TRAP_SIGNED (type
))
317 return expand_vector_addition (bsi
, do_unop
, do_negate
, type
,
318 TREE_OPERAND (rhs
, 0),
325 return expand_vector_parallel (bsi
, do_binop
, type
,
326 TREE_OPERAND (rhs
, 0),
327 TREE_OPERAND (rhs
, 1), code
);
330 return expand_vector_parallel (bsi
, do_unop
, type
,
331 TREE_OPERAND (rhs
, 0),
338 if (TREE_CODE_CLASS (code
) == tcc_unary
)
339 return expand_vector_piecewise (bsi
, do_unop
, type
, compute_type
,
340 TREE_OPERAND (rhs
, 0),
343 return expand_vector_piecewise (bsi
, do_binop
, type
, compute_type
,
344 TREE_OPERAND (rhs
, 0),
345 TREE_OPERAND (rhs
, 1), code
);
348 /* Return a type for the widest vector mode whose components are of mode
349 INNER_MODE, or NULL_TREE if none is found. */
351 type_for_widest_vector_mode (enum machine_mode inner_mode
, optab op
)
353 enum machine_mode best_mode
= VOIDmode
, mode
;
356 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
357 mode
= MIN_MODE_VECTOR_FLOAT
;
359 mode
= MIN_MODE_VECTOR_INT
;
361 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
362 if (GET_MODE_INNER (mode
) == inner_mode
363 && GET_MODE_NUNITS (mode
) > best_nunits
364 && op
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
365 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
367 if (best_mode
== VOIDmode
)
370 return lang_hooks
.types
.type_for_mode (best_mode
, 1);
373 /* Process one statement. If we identify a vector operation, expand it. */
376 expand_vector_operations_1 (block_stmt_iterator
*bsi
)
378 tree stmt
= bsi_stmt (*bsi
);
379 tree
*p_lhs
, *p_rhs
, lhs
, rhs
, type
, compute_type
;
381 enum machine_mode compute_mode
;
384 switch (TREE_CODE (stmt
))
387 stmt
= TREE_OPERAND (stmt
, 0);
388 if (!stmt
|| TREE_CODE (stmt
) != MODIFY_EXPR
)
394 p_lhs
= &TREE_OPERAND (stmt
, 0);
395 p_rhs
= &TREE_OPERAND (stmt
, 1);
404 type
= TREE_TYPE (rhs
);
405 if (TREE_CODE (type
) != VECTOR_TYPE
)
408 code
= TREE_CODE (rhs
);
409 if (TREE_CODE_CLASS (code
) != tcc_unary
410 && TREE_CODE_CLASS (code
) != tcc_binary
)
413 if (code
== NOP_EXPR
|| code
== VIEW_CONVERT_EXPR
)
416 gcc_assert (code
!= CONVERT_EXPR
);
417 op
= optab_for_tree_code (code
, type
);
419 /* Optabs will try converting a negation into a subtraction, so
420 look for it as well. TODO: negation of floating-point vectors
421 might be turned into an exclusive OR toggling the sign bit. */
423 && code
== NEGATE_EXPR
424 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
425 op
= optab_for_tree_code (MINUS_EXPR
, type
);
427 /* For very wide vectors, try using a smaller vector mode. */
429 if (TYPE_MODE (type
) == BLKmode
&& op
)
431 tree vector_compute_type
432 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type
)), op
);
433 if (vector_compute_type
!= NULL_TREE
)
434 compute_type
= vector_compute_type
;
437 /* If we are breaking a BLKmode vector into smaller pieces,
438 type_for_widest_vector_mode has already looked into the optab,
439 so skip these checks. */
440 if (compute_type
== type
)
442 compute_mode
= TYPE_MODE (compute_type
);
443 if ((GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_INT
444 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FLOAT
)
446 && op
->handlers
[compute_mode
].insn_code
!= CODE_FOR_nothing
)
449 /* There is no operation in hardware, so fall back to scalars. */
450 compute_type
= TREE_TYPE (type
);
453 gcc_assert (code
!= VEC_LSHIFT_EXPR
&& code
!= VEC_RSHIFT_EXPR
);
454 rhs
= expand_vector_operation (bsi
, type
, compute_type
, rhs
, code
);
455 if (lang_hooks
.types_compatible_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
459 /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
460 be stored in memory anyway, so prefer NOP_EXPR. We should also try
461 performing the VIEW_CONVERT_EXPR on the left side of the
463 if (TYPE_MODE (TREE_TYPE (rhs
)) == BLKmode
)
464 *p_rhs
= gimplify_build1 (bsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
), rhs
);
466 *p_rhs
= gimplify_build1 (bsi
, NOP_EXPR
, TREE_TYPE (lhs
), rhs
);
469 mark_stmt_modified (bsi_stmt (*bsi
));
472 /* Use this to lower vector operations introduced by the vectorizer,
473 if it may need the bit-twiddling tricks implemented in this file. */
476 gate_expand_vector_operations (void)
478 return flag_tree_vectorize
!= 0;
482 expand_vector_operations (void)
484 block_stmt_iterator bsi
;
489 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
491 expand_vector_operations_1 (&bsi
);
492 update_stmt_if_modified (bsi_stmt (bsi
));
497 struct tree_opt_pass pass_lower_vector
=
499 "veclower", /* name */
501 expand_vector_operations
, /* execute */
504 0, /* static_pass_number */
506 PROP_cfg
, /* properties_required */
507 0, /* properties_provided */
508 0, /* properties_destroyed */
509 0, /* todo_flags_start */
510 TODO_dump_func
| TODO_ggc_collect
511 | TODO_verify_stmts
, /* todo_flags_finish */
515 struct tree_opt_pass pass_lower_vector_ssa
=
517 "veclower2", /* name */
518 gate_expand_vector_operations
, /* gate */
519 expand_vector_operations
, /* execute */
522 0, /* static_pass_number */
524 PROP_cfg
, /* properties_required */
525 0, /* properties_provided */
526 0, /* properties_destroyed */
527 0, /* todo_flags_start */
528 TODO_dump_func
| TODO_update_ssa
/* todo_flags_finish */
530 | TODO_verify_stmts
| TODO_verify_flow
,
534 #include "gt-tree-vect-generic.h"