1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
34 /* Need to include rtl.h, expr.h, etc. for optabs. */
38 /* Build a constant of type TYPE, made of VALUE's bits replicated
39 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
41 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
43 int width
= tree_low_cst (TYPE_SIZE (inner_type
), 1);
44 int n
= HOST_BITS_PER_WIDE_INT
/ width
;
45 unsigned HOST_WIDE_INT low
, high
, mask
;
50 if (width
== HOST_BITS_PER_WIDE_INT
)
54 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
55 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
58 if (TYPE_PRECISION (type
) < HOST_BITS_PER_WIDE_INT
)
59 low
&= ((HOST_WIDE_INT
)1 << TYPE_PRECISION (type
)) - 1, high
= 0;
60 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_WIDE_INT
)
62 else if (TYPE_PRECISION (type
) == 2 * HOST_BITS_PER_WIDE_INT
)
67 ret
= build_int_cst_wide (type
, low
, high
);
71 static GTY(()) tree vector_inner_type
;
72 static GTY(()) tree vector_last_type
;
73 static GTY(()) int vector_last_nunits
;
75 /* Return a suitable vector types made of SUBPARTS units each of mode
76 "word_mode" (the global variable). */
78 build_word_mode_vector_type (int nunits
)
80 if (!vector_inner_type
)
81 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
82 else if (vector_last_nunits
== nunits
)
84 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
85 return vector_last_type
;
88 /* We build a new type, but we canonicalize it nevertheless,
89 because it still saves some memory. */
90 vector_last_nunits
= nunits
;
91 vector_last_type
= type_hash_canon (nunits
,
92 build_vector_type (vector_inner_type
,
94 return vector_last_type
;
97 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
98 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
101 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
102 tree t
, tree bitsize
, tree bitpos
)
105 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
107 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
111 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
112 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
115 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
116 return gimplify_build1 (gsi
, code
, inner_type
, a
);
120 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
121 tree bitpos
, tree bitsize
, enum tree_code code
)
123 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
124 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
125 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
128 /* Expand vector addition to scalars. This does bit twiddling
129 in order to increase parallelism:
131 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
134 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
135 (a ^ ~b) & 0x80808080
137 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
139 This optimization should be done only if 4 vector items or more
142 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
143 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
146 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
147 unsigned HOST_WIDE_INT max
;
148 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
150 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
151 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
152 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
154 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
155 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
157 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
158 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
159 if (code
== PLUS_EXPR
)
160 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
163 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
164 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
167 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
168 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
169 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
173 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
174 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
175 tree bitsize ATTRIBUTE_UNUSED
,
176 enum tree_code code ATTRIBUTE_UNUSED
)
178 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
180 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
182 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
183 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
184 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
186 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
188 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
189 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
190 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
191 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
192 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
195 /* Expand a vector operation to scalars, by using many operations
196 whose type is the vector type's inner type. */
198 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
199 tree type
, tree inner_type
,
200 tree a
, tree b
, enum tree_code code
)
202 VEC(constructor_elt
,gc
) *v
;
203 tree part_width
= TYPE_SIZE (inner_type
);
204 tree index
= bitsize_int (0);
205 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
206 int delta
= tree_low_cst (part_width
, 1)
207 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type
)), 1);
210 v
= VEC_alloc(constructor_elt
, gc
, (nunits
+ delta
- 1) / delta
);
211 for (i
= 0; i
< nunits
;
212 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
, 0))
214 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
);
215 constructor_elt
*ce
= VEC_quick_push (constructor_elt
, v
, NULL
);
216 ce
->index
= NULL_TREE
;
220 return build_constructor (type
, v
);
223 /* Expand a vector operation to scalars with the freedom to use
224 a scalar integer type, or to use a different size for the items
225 in the vector type. */
227 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
231 tree result
, compute_type
;
232 enum machine_mode mode
;
233 int n_words
= tree_low_cst (TYPE_SIZE_UNIT (type
), 1) / UNITS_PER_WORD
;
235 /* We have three strategies. If the type is already correct, just do
236 the operation an element at a time. Else, if the vector is wider than
237 one word, do it a word at a time; finally, if the vector is smaller
238 than one word, do it as a scalar. */
239 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
240 return expand_vector_piecewise (gsi
, f
,
241 type
, TREE_TYPE (type
),
243 else if (n_words
> 1)
245 tree word_type
= build_word_mode_vector_type (n_words
);
246 result
= expand_vector_piecewise (gsi
, f
,
247 word_type
, TREE_TYPE (word_type
),
249 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
254 /* Use a single scalar operation with a mode no wider than word_mode. */
255 mode
= mode_for_size (tree_low_cst (TYPE_SIZE (type
), 1), MODE_INT
, 0);
256 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
257 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
263 /* Expand a vector operation to scalars; for integer types we can use
264 special bit twiddling tricks to do the sums a word at a time, using
265 function F_PARALLEL instead of F. These tricks are done only if
266 they can process at least four items, that is, only if the vector
267 holds at least four items and if a word can hold four items. */
269 expand_vector_addition (gimple_stmt_iterator
*gsi
,
270 elem_op_func f
, elem_op_func f_parallel
,
271 tree type
, tree a
, tree b
, enum tree_code code
)
273 int parts_per_word
= UNITS_PER_WORD
274 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type
)), 1);
276 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
277 && parts_per_word
>= 4
278 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
279 return expand_vector_parallel (gsi
, f_parallel
,
282 return expand_vector_piecewise (gsi
, f
,
283 type
, TREE_TYPE (type
),
288 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
289 gimple assign
, enum tree_code code
)
291 enum machine_mode compute_mode
= TYPE_MODE (compute_type
);
293 /* If the compute mode is not a vector mode (hence we are not decomposing
294 a BLKmode vector to smaller, hardware-supported vectors), we may want
295 to expand the operations in parallel. */
296 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
297 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
298 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
299 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
300 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
301 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
306 if (!TYPE_OVERFLOW_TRAPS (type
))
307 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
308 gimple_assign_rhs1 (assign
),
309 gimple_assign_rhs2 (assign
), code
);
313 if (!TYPE_OVERFLOW_TRAPS (type
))
314 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
315 gimple_assign_rhs1 (assign
),
322 return expand_vector_parallel (gsi
, do_binop
, type
,
323 gimple_assign_rhs1 (assign
),
324 gimple_assign_rhs2 (assign
), code
);
327 return expand_vector_parallel (gsi
, do_unop
, type
,
328 gimple_assign_rhs1 (assign
),
335 if (TREE_CODE_CLASS (code
) == tcc_unary
)
336 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
337 gimple_assign_rhs1 (assign
),
340 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
341 gimple_assign_rhs1 (assign
),
342 gimple_assign_rhs2 (assign
), code
);
345 /* Return a type for the widest vector mode whose components are of mode
346 INNER_MODE, or NULL_TREE if none is found.
347 SATP is true for saturating fixed-point types. */
350 type_for_widest_vector_mode (enum machine_mode inner_mode
, optab op
, int satp
)
352 enum machine_mode best_mode
= VOIDmode
, mode
;
355 if (SCALAR_FLOAT_MODE_P (inner_mode
))
356 mode
= MIN_MODE_VECTOR_FLOAT
;
357 else if (SCALAR_FRACT_MODE_P (inner_mode
))
358 mode
= MIN_MODE_VECTOR_FRACT
;
359 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
360 mode
= MIN_MODE_VECTOR_UFRACT
;
361 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
362 mode
= MIN_MODE_VECTOR_ACCUM
;
363 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
364 mode
= MIN_MODE_VECTOR_UACCUM
;
366 mode
= MIN_MODE_VECTOR_INT
;
368 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
369 if (GET_MODE_INNER (mode
) == inner_mode
370 && GET_MODE_NUNITS (mode
) > best_nunits
371 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
372 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
374 if (best_mode
== VOIDmode
)
378 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */
379 if (ALL_FIXED_POINT_MODE_P (best_mode
))
380 return lang_hooks
.types
.type_for_mode (best_mode
, satp
);
382 return lang_hooks
.types
.type_for_mode (best_mode
, 1);
386 /* Process one statement. If we identify a vector operation, expand it. */
389 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
391 gimple stmt
= gsi_stmt (*gsi
);
392 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
;
394 enum machine_mode compute_mode
;
396 enum gimple_rhs_class rhs_class
;
399 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
402 code
= gimple_assign_rhs_code (stmt
);
403 rhs_class
= get_gimple_rhs_class (code
);
405 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
408 lhs
= gimple_assign_lhs (stmt
);
409 rhs1
= gimple_assign_rhs1 (stmt
);
410 type
= gimple_expr_type (stmt
);
411 if (rhs_class
== GIMPLE_BINARY_RHS
)
412 rhs2
= gimple_assign_rhs2 (stmt
);
414 if (TREE_CODE (type
) != VECTOR_TYPE
)
418 || code
== FLOAT_EXPR
419 || code
== FIX_TRUNC_EXPR
420 || code
== VIEW_CONVERT_EXPR
)
423 gcc_assert (code
!= CONVERT_EXPR
);
425 /* The signedness is determined from input argument. */
426 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
427 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
428 type
= TREE_TYPE (rhs1
);
430 /* Choose between vector shift/rotate by vector and vector shift/rotate by
432 if (code
== LSHIFT_EXPR
433 || code
== RSHIFT_EXPR
434 || code
== LROTATE_EXPR
435 || code
== RROTATE_EXPR
)
437 /* If the 2nd argument is vector, we need a vector/vector shift */
438 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2
))))
439 op
= optab_for_tree_code (code
, type
, optab_vector
);
442 /* Try for a vector/scalar shift, and if we don't have one, see if we
443 have a vector/vector shift */
444 op
= optab_for_tree_code (code
, type
, optab_scalar
);
446 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
447 op
= optab_for_tree_code (code
, type
, optab_vector
);
451 op
= optab_for_tree_code (code
, type
, optab_default
);
453 /* For widening/narrowing vector operations, the relevant type is of the
454 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
455 calculated in the same way above. */
456 if (code
== WIDEN_SUM_EXPR
457 || code
== VEC_WIDEN_MULT_HI_EXPR
458 || code
== VEC_WIDEN_MULT_LO_EXPR
459 || code
== VEC_UNPACK_HI_EXPR
460 || code
== VEC_UNPACK_LO_EXPR
461 || code
== VEC_PACK_TRUNC_EXPR
462 || code
== VEC_PACK_SAT_EXPR
463 || code
== VEC_PACK_FIX_TRUNC_EXPR
)
464 type
= TREE_TYPE (rhs1
);
466 /* Optabs will try converting a negation into a subtraction, so
467 look for it as well. TODO: negation of floating-point vectors
468 might be turned into an exclusive OR toggling the sign bit. */
470 && code
== NEGATE_EXPR
471 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
472 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
474 /* For very wide vectors, try using a smaller vector mode. */
476 if (TYPE_MODE (type
) == BLKmode
&& op
)
478 tree vector_compute_type
479 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type
)), op
,
480 TYPE_SATURATING (TREE_TYPE (type
)));
481 if (vector_compute_type
!= NULL_TREE
482 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
483 < TYPE_VECTOR_SUBPARTS (compute_type
)))
484 compute_type
= vector_compute_type
;
487 /* If we are breaking a BLKmode vector into smaller pieces,
488 type_for_widest_vector_mode has already looked into the optab,
489 so skip these checks. */
490 if (compute_type
== type
)
492 compute_mode
= TYPE_MODE (compute_type
);
493 if ((GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_INT
494 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FLOAT
495 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FRACT
496 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_UFRACT
497 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_ACCUM
498 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_UACCUM
)
500 && optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
503 /* There is no operation in hardware, so fall back to scalars. */
504 compute_type
= TREE_TYPE (type
);
507 gcc_assert (code
!= VEC_LSHIFT_EXPR
&& code
!= VEC_RSHIFT_EXPR
);
508 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
509 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
510 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
513 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
514 way to do it is change expand_vector_operation and its callees to
515 return a tree_code, RHS1 and RHS2 instead of a tree. */
516 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
518 gimple_set_modified (gsi_stmt (*gsi
), true);
521 /* Use this to lower vector operations introduced by the vectorizer,
522 if it may need the bit-twiddling tricks implemented in this file. */
525 gate_expand_vector_operations (void)
527 return flag_tree_vectorize
!= 0;
531 expand_vector_operations (void)
533 gimple_stmt_iterator gsi
;
538 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
540 expand_vector_operations_1 (&gsi
);
541 update_stmt_if_modified (gsi_stmt (gsi
));
547 struct gimple_opt_pass pass_lower_vector
=
551 "veclower", /* name */
553 expand_vector_operations
, /* execute */
556 0, /* static_pass_number */
558 PROP_cfg
, /* properties_required */
559 0, /* properties_provided */
560 0, /* properties_destroyed */
561 0, /* todo_flags_start */
562 TODO_dump_func
| TODO_ggc_collect
563 | TODO_verify_stmts
/* todo_flags_finish */
567 struct gimple_opt_pass pass_lower_vector_ssa
=
571 "veclower2", /* name */
572 gate_expand_vector_operations
, /* gate */
573 expand_vector_operations
, /* execute */
576 0, /* static_pass_number */
578 PROP_cfg
, /* properties_required */
579 0, /* properties_provided */
580 0, /* properties_destroyed */
581 0, /* todo_flags_start */
582 TODO_dump_func
| TODO_update_ssa
/* todo_flags_finish */
584 | TODO_verify_stmts
| TODO_verify_flow
588 #include "gt-tree-vect-generic.h"