1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
41 /* Build a constant of type TYPE, made of VALUE's bits replicated
42 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
44 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
46 int width
= tree_low_cst (TYPE_SIZE (inner_type
), 1);
47 int n
= HOST_BITS_PER_WIDE_INT
/ width
;
48 unsigned HOST_WIDE_INT low
, high
, mask
;
53 if (width
== HOST_BITS_PER_WIDE_INT
)
57 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
58 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
61 if (TYPE_PRECISION (type
) < HOST_BITS_PER_WIDE_INT
)
62 low
&= ((HOST_WIDE_INT
)1 << TYPE_PRECISION (type
)) - 1, high
= 0;
63 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_WIDE_INT
)
65 else if (TYPE_PRECISION (type
) == 2 * HOST_BITS_PER_WIDE_INT
)
70 ret
= build_int_cst_wide (type
, low
, high
);
74 static GTY(()) tree vector_inner_type
;
75 static GTY(()) tree vector_last_type
;
76 static GTY(()) int vector_last_nunits
;
78 /* Return a suitable vector types made of SUBPARTS units each of mode
79 "word_mode" (the global variable). */
81 build_word_mode_vector_type (int nunits
)
83 if (!vector_inner_type
)
84 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
85 else if (vector_last_nunits
== nunits
)
87 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
88 return vector_last_type
;
91 /* We build a new type, but we canonicalize it nevertheless,
92 because it still saves some memory. */
93 vector_last_nunits
= nunits
;
94 vector_last_type
= type_hash_canon (nunits
,
95 build_vector_type (vector_inner_type
,
97 return vector_last_type
;
100 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
101 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
104 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
105 tree t
, tree bitsize
, tree bitpos
)
108 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
110 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
114 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
115 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
118 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
119 return gimplify_build1 (gsi
, code
, inner_type
, a
);
123 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
124 tree bitpos
, tree bitsize
, enum tree_code code
)
126 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
127 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
128 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
131 /* Expand vector addition to scalars. This does bit twiddling
132 in order to increase parallelism:
134 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
137 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
138 (a ^ ~b) & 0x80808080
140 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
142 This optimization should be done only if 4 vector items or more
145 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
146 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
149 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
150 unsigned HOST_WIDE_INT max
;
151 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
153 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
154 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
155 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
157 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
158 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
160 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
161 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
162 if (code
== PLUS_EXPR
)
163 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
166 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
167 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
170 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
171 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
172 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
176 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
177 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
178 tree bitsize ATTRIBUTE_UNUSED
,
179 enum tree_code code ATTRIBUTE_UNUSED
)
181 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
183 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
185 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
186 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
187 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
189 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
191 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
192 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
193 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
194 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
195 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
198 /* Expand a vector operation to scalars, by using many operations
199 whose type is the vector type's inner type. */
201 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
202 tree type
, tree inner_type
,
203 tree a
, tree b
, enum tree_code code
)
205 VEC(constructor_elt
,gc
) *v
;
206 tree part_width
= TYPE_SIZE (inner_type
);
207 tree index
= bitsize_int (0);
208 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
209 int delta
= tree_low_cst (part_width
, 1)
210 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type
)), 1);
213 v
= VEC_alloc(constructor_elt
, gc
, (nunits
+ delta
- 1) / delta
);
214 for (i
= 0; i
< nunits
;
215 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
, 0))
217 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
);
218 constructor_elt
*ce
= VEC_quick_push (constructor_elt
, v
, NULL
);
219 ce
->index
= NULL_TREE
;
223 return build_constructor (type
, v
);
226 /* Expand a vector operation to scalars with the freedom to use
227 a scalar integer type, or to use a different size for the items
228 in the vector type. */
230 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
234 tree result
, compute_type
;
235 enum machine_mode mode
;
236 int n_words
= tree_low_cst (TYPE_SIZE_UNIT (type
), 1) / UNITS_PER_WORD
;
238 /* We have three strategies. If the type is already correct, just do
239 the operation an element at a time. Else, if the vector is wider than
240 one word, do it a word at a time; finally, if the vector is smaller
241 than one word, do it as a scalar. */
242 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
243 return expand_vector_piecewise (gsi
, f
,
244 type
, TREE_TYPE (type
),
246 else if (n_words
> 1)
248 tree word_type
= build_word_mode_vector_type (n_words
);
249 result
= expand_vector_piecewise (gsi
, f
,
250 word_type
, TREE_TYPE (word_type
),
252 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
257 /* Use a single scalar operation with a mode no wider than word_mode. */
258 mode
= mode_for_size (tree_low_cst (TYPE_SIZE (type
), 1), MODE_INT
, 0);
259 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
260 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
266 /* Expand a vector operation to scalars; for integer types we can use
267 special bit twiddling tricks to do the sums a word at a time, using
268 function F_PARALLEL instead of F. These tricks are done only if
269 they can process at least four items, that is, only if the vector
270 holds at least four items and if a word can hold four items. */
272 expand_vector_addition (gimple_stmt_iterator
*gsi
,
273 elem_op_func f
, elem_op_func f_parallel
,
274 tree type
, tree a
, tree b
, enum tree_code code
)
276 int parts_per_word
= UNITS_PER_WORD
277 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type
)), 1);
279 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
280 && parts_per_word
>= 4
281 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
282 return expand_vector_parallel (gsi
, f_parallel
,
285 return expand_vector_piecewise (gsi
, f
,
286 type
, TREE_TYPE (type
),
291 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
292 gimple assign
, enum tree_code code
)
294 enum machine_mode compute_mode
= TYPE_MODE (compute_type
);
296 /* If the compute mode is not a vector mode (hence we are not decomposing
297 a BLKmode vector to smaller, hardware-supported vectors), we may want
298 to expand the operations in parallel. */
299 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
300 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
301 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
302 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
303 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
304 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
309 if (!TYPE_OVERFLOW_TRAPS (type
))
310 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
311 gimple_assign_rhs1 (assign
),
312 gimple_assign_rhs2 (assign
), code
);
316 if (!TYPE_OVERFLOW_TRAPS (type
))
317 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
318 gimple_assign_rhs1 (assign
),
325 return expand_vector_parallel (gsi
, do_binop
, type
,
326 gimple_assign_rhs1 (assign
),
327 gimple_assign_rhs2 (assign
), code
);
330 return expand_vector_parallel (gsi
, do_unop
, type
,
331 gimple_assign_rhs1 (assign
),
338 if (TREE_CODE_CLASS (code
) == tcc_unary
)
339 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
340 gimple_assign_rhs1 (assign
),
343 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
344 gimple_assign_rhs1 (assign
),
345 gimple_assign_rhs2 (assign
), code
);
348 /* Return a type for the widest vector mode whose components are of mode
349 INNER_MODE, or NULL_TREE if none is found.
350 SATP is true for saturating fixed-point types. */
353 type_for_widest_vector_mode (enum machine_mode inner_mode
, optab op
, int satp
)
355 enum machine_mode best_mode
= VOIDmode
, mode
;
358 if (SCALAR_FLOAT_MODE_P (inner_mode
))
359 mode
= MIN_MODE_VECTOR_FLOAT
;
360 else if (SCALAR_FRACT_MODE_P (inner_mode
))
361 mode
= MIN_MODE_VECTOR_FRACT
;
362 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
363 mode
= MIN_MODE_VECTOR_UFRACT
;
364 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
365 mode
= MIN_MODE_VECTOR_ACCUM
;
366 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
367 mode
= MIN_MODE_VECTOR_UACCUM
;
369 mode
= MIN_MODE_VECTOR_INT
;
371 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
372 if (GET_MODE_INNER (mode
) == inner_mode
373 && GET_MODE_NUNITS (mode
) > best_nunits
374 && optab_handler (op
, mode
)->insn_code
!= CODE_FOR_nothing
)
375 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
377 if (best_mode
== VOIDmode
)
381 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */
382 if (ALL_FIXED_POINT_MODE_P (best_mode
))
383 return lang_hooks
.types
.type_for_mode (best_mode
, satp
);
385 return lang_hooks
.types
.type_for_mode (best_mode
, 1);
389 /* Process one statement. If we identify a vector operation, expand it. */
392 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
394 gimple stmt
= gsi_stmt (*gsi
);
395 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
;
397 enum machine_mode compute_mode
;
399 enum gimple_rhs_class rhs_class
;
402 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
405 code
= gimple_assign_rhs_code (stmt
);
406 rhs_class
= get_gimple_rhs_class (code
);
408 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
411 lhs
= gimple_assign_lhs (stmt
);
412 rhs1
= gimple_assign_rhs1 (stmt
);
413 type
= gimple_expr_type (stmt
);
414 if (rhs_class
== GIMPLE_BINARY_RHS
)
415 rhs2
= gimple_assign_rhs2 (stmt
);
417 if (TREE_CODE (type
) != VECTOR_TYPE
)
421 || code
== FLOAT_EXPR
422 || code
== FIX_TRUNC_EXPR
423 || code
== VIEW_CONVERT_EXPR
)
426 gcc_assert (code
!= CONVERT_EXPR
);
428 /* The signedness is determined from input argument. */
429 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
430 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
431 type
= TREE_TYPE (rhs1
);
433 /* Choose between vector shift/rotate by vector and vector shift/rotate by
435 if (code
== LSHIFT_EXPR
436 || code
== RSHIFT_EXPR
437 || code
== LROTATE_EXPR
438 || code
== RROTATE_EXPR
)
440 /* If the 2nd argument is vector, we need a vector/vector shift */
441 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2
))))
442 op
= optab_for_tree_code (code
, type
, optab_vector
);
445 /* Try for a vector/scalar shift, and if we don't have one, see if we
446 have a vector/vector shift */
447 op
= optab_for_tree_code (code
, type
, optab_scalar
);
449 || (op
->handlers
[(int) TYPE_MODE (type
)].insn_code
450 == CODE_FOR_nothing
))
451 op
= optab_for_tree_code (code
, type
, optab_vector
);
455 op
= optab_for_tree_code (code
, type
, optab_default
);
457 /* For widening/narrowing vector operations, the relevant type is of the
458 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
459 calculated in the same way above. */
460 if (code
== WIDEN_SUM_EXPR
461 || code
== VEC_WIDEN_MULT_HI_EXPR
462 || code
== VEC_WIDEN_MULT_LO_EXPR
463 || code
== VEC_UNPACK_HI_EXPR
464 || code
== VEC_UNPACK_LO_EXPR
465 || code
== VEC_PACK_TRUNC_EXPR
466 || code
== VEC_PACK_SAT_EXPR
467 || code
== VEC_PACK_FIX_TRUNC_EXPR
)
468 type
= TREE_TYPE (rhs1
);
470 /* Optabs will try converting a negation into a subtraction, so
471 look for it as well. TODO: negation of floating-point vectors
472 might be turned into an exclusive OR toggling the sign bit. */
474 && code
== NEGATE_EXPR
475 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
476 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
478 /* For very wide vectors, try using a smaller vector mode. */
480 if (TYPE_MODE (type
) == BLKmode
&& op
)
482 tree vector_compute_type
483 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type
)), op
,
484 TYPE_SATURATING (TREE_TYPE (type
)));
485 if (vector_compute_type
!= NULL_TREE
486 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
487 < TYPE_VECTOR_SUBPARTS (compute_type
)))
488 compute_type
= vector_compute_type
;
491 /* If we are breaking a BLKmode vector into smaller pieces,
492 type_for_widest_vector_mode has already looked into the optab,
493 so skip these checks. */
494 if (compute_type
== type
)
496 compute_mode
= TYPE_MODE (compute_type
);
497 if ((GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_INT
498 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FLOAT
499 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FRACT
500 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_UFRACT
501 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_ACCUM
502 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_UACCUM
)
504 && optab_handler (op
, compute_mode
)->insn_code
!= CODE_FOR_nothing
)
507 /* There is no operation in hardware, so fall back to scalars. */
508 compute_type
= TREE_TYPE (type
);
511 gcc_assert (code
!= VEC_LSHIFT_EXPR
&& code
!= VEC_RSHIFT_EXPR
);
512 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
513 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
514 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
517 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
518 way to do it is change expand_vector_operation and its callees to
519 return a tree_code, RHS1 and RHS2 instead of a tree. */
520 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
522 gimple_set_modified (gsi_stmt (*gsi
), true);
525 /* Use this to lower vector operations introduced by the vectorizer,
526 if it may need the bit-twiddling tricks implemented in this file. */
529 gate_expand_vector_operations (void)
531 return flag_tree_vectorize
!= 0;
535 expand_vector_operations (void)
537 gimple_stmt_iterator gsi
;
542 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
544 expand_vector_operations_1 (&gsi
);
545 update_stmt_if_modified (gsi_stmt (gsi
));
551 struct gimple_opt_pass pass_lower_vector
=
555 "veclower", /* name */
557 expand_vector_operations
, /* execute */
560 0, /* static_pass_number */
562 PROP_cfg
, /* properties_required */
563 0, /* properties_provided */
564 0, /* properties_destroyed */
565 0, /* todo_flags_start */
566 TODO_dump_func
| TODO_ggc_collect
567 | TODO_verify_stmts
/* todo_flags_finish */
571 struct gimple_opt_pass pass_lower_vector_ssa
=
575 "veclower2", /* name */
576 gate_expand_vector_operations
, /* gate */
577 expand_vector_operations
, /* execute */
580 0, /* static_pass_number */
582 PROP_cfg
, /* properties_required */
583 0, /* properties_provided */
584 0, /* properties_destroyed */
585 0, /* todo_flags_start */
586 TODO_dump_func
| TODO_update_ssa
/* todo_flags_finish */
588 | TODO_verify_stmts
| TODO_verify_flow
592 #include "gt-tree-vect-generic.h"