1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
40 /* Extract the real or imaginary part of a complex variable or constant.
41 Make sure that it's a proper gimple_val and gimplify it if not.
42 Emit any new code before BSI. */
45 extract_component (block_stmt_iterator
*bsi
, tree t
, bool imagpart_p
)
49 inner_type
= TREE_TYPE (TREE_TYPE (t
));
50 switch (TREE_CODE (t
))
53 ret
= (imagpart_p
? TREE_IMAGPART (t
) : TREE_REALPART (t
));
57 ret
= TREE_OPERAND (t
, imagpart_p
);
62 ret
= build1 ((imagpart_p
? IMAGPART_EXPR
: REALPART_EXPR
),
70 return gimplify_val (bsi
, inner_type
, ret
);
73 /* Update an assignment to a complex variable in place. */
76 update_complex_assignment (block_stmt_iterator
*bsi
, tree r
, tree i
)
78 tree stmt
= bsi_stmt (*bsi
);
82 if (TREE_CODE (stmt
) == RETURN_EXPR
)
83 stmt
= TREE_OPERAND (stmt
, 0);
85 type
= TREE_TYPE (TREE_OPERAND (stmt
, 1));
86 TREE_OPERAND (stmt
, 1) = build (COMPLEX_EXPR
, type
, r
, i
);
89 /* Expand complex addition to scalars:
90 a + b = (ar + br) + i(ai + bi)
91 a - b = (ar - br) + i(ai + bi)
95 expand_complex_addition (block_stmt_iterator
*bsi
, tree inner_type
,
96 tree ar
, tree ai
, tree br
, tree bi
,
101 rr
= gimplify_build2 (bsi
, code
, inner_type
, ar
, br
);
102 ri
= gimplify_build2 (bsi
, code
, inner_type
, ai
, bi
);
104 update_complex_assignment (bsi
, rr
, ri
);
107 /* Expand complex multiplication to scalars:
108 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
112 expand_complex_multiplication (block_stmt_iterator
*bsi
, tree inner_type
,
113 tree ar
, tree ai
, tree br
, tree bi
)
115 tree t1
, t2
, t3
, t4
, rr
, ri
;
117 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, br
);
118 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, bi
);
119 t3
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, bi
);
121 /* Avoid expanding redundant multiplication for the common
122 case of squaring a complex number. */
123 if (ar
== br
&& ai
== bi
)
126 t4
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, br
);
128 rr
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, t1
, t2
);
129 ri
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t3
, t4
);
131 update_complex_assignment (bsi
, rr
, ri
);
134 /* Expand complex division to scalars, straightforward algorithm.
135 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
140 expand_complex_div_straight (block_stmt_iterator
*bsi
, tree inner_type
,
141 tree ar
, tree ai
, tree br
, tree bi
,
144 tree rr
, ri
, div
, t1
, t2
, t3
;
146 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, br
, br
);
147 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, bi
, bi
);
148 div
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, t2
);
150 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, br
);
151 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, bi
);
152 t3
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, t2
);
153 rr
= gimplify_build2 (bsi
, code
, inner_type
, t3
, div
);
155 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, br
);
156 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, bi
);
157 t3
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, t1
, t2
);
158 ri
= gimplify_build2 (bsi
, code
, inner_type
, t3
, div
);
160 update_complex_assignment (bsi
, rr
, ri
);
163 /* Expand complex division to scalars, modified algorithm to minimize
164 overflow with wide input ranges. */
167 expand_complex_div_wide (block_stmt_iterator
*bsi
, tree inner_type
,
168 tree ar
, tree ai
, tree br
, tree bi
,
171 tree rr
, ri
, ratio
, div
, t1
, t2
, min
, max
, cond
;
173 /* Examine |br| < |bi|, and branch. */
174 t1
= gimplify_build1 (bsi
, ABS_EXPR
, inner_type
, br
);
175 t2
= gimplify_build1 (bsi
, ABS_EXPR
, inner_type
, bi
);
176 cond
= fold (build (LT_EXPR
, boolean_type_node
, t1
, t2
));
179 if (TREE_CONSTANT (cond
))
181 if (integer_zerop (cond
))
188 basic_block bb_cond
, bb_true
, bb_false
, bb_join
;
192 l1
= create_artificial_label ();
193 t1
= build (GOTO_EXPR
, void_type_node
, l1
);
194 l2
= create_artificial_label ();
195 t2
= build (GOTO_EXPR
, void_type_node
, l2
);
196 cond
= build (COND_EXPR
, void_type_node
, cond
, t1
, t2
);
197 bsi_insert_before (bsi
, cond
, BSI_SAME_STMT
);
199 min
= make_rename_temp (inner_type
, NULL
);
200 max
= make_rename_temp (inner_type
, NULL
);
201 l3
= create_artificial_label ();
203 /* Split the original block, and create the TRUE and FALSE blocks. */
204 e
= split_block (bsi
->bb
, cond
);
207 bb_true
= create_empty_bb (bb_cond
);
208 bb_false
= create_empty_bb (bb_true
);
210 /* Wire the blocks together. */
211 e
->flags
= EDGE_TRUE_VALUE
;
212 redirect_edge_succ (e
, bb_true
);
213 make_edge (bb_cond
, bb_false
, EDGE_FALSE_VALUE
);
214 make_edge (bb_true
, bb_join
, 0);
215 make_edge (bb_false
, bb_join
, 0);
217 /* Update dominance info. Note that bb_join's data was
218 updated by split_block. */
219 if (dom_computed
[CDI_DOMINATORS
] >= DOM_CONS_OK
)
221 set_immediate_dominator (CDI_DOMINATORS
, bb_true
, bb_cond
);
222 set_immediate_dominator (CDI_DOMINATORS
, bb_false
, bb_cond
);
225 /* Compute min and max for TRUE block. */
226 *bsi
= bsi_start (bb_true
);
227 t1
= build (LABEL_EXPR
, void_type_node
, l1
);
228 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
229 t1
= build (MODIFY_EXPR
, inner_type
, min
, br
);
230 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
231 t1
= build (MODIFY_EXPR
, inner_type
, max
, bi
);
232 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
234 /* Compute min and max for FALSE block. */
235 *bsi
= bsi_start (bb_false
);
236 t1
= build (LABEL_EXPR
, void_type_node
, l2
);
237 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
238 t1
= build (MODIFY_EXPR
, inner_type
, min
, bi
);
239 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
240 t1
= build (MODIFY_EXPR
, inner_type
, max
, br
);
241 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
243 /* Insert the join label into the tail of the original block. */
244 *bsi
= bsi_start (bb_join
);
245 t1
= build (LABEL_EXPR
, void_type_node
, l3
);
246 bsi_insert_before (bsi
, t1
, BSI_SAME_STMT
);
249 /* Now we have MIN(|br|, |bi|) and MAX(|br|, |bi|). We now use the
250 ratio min/max to scale both the dividend and divisor. */
251 ratio
= gimplify_build2 (bsi
, code
, inner_type
, min
, max
);
253 /* Calculate the divisor: min*ratio + max. */
254 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, min
, ratio
);
255 div
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, max
);
257 /* Result is now ((ar + ai*ratio)/div) + i((ai - ar*ratio)/div). */
258 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, ratio
);
259 t2
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, ar
, t1
);
260 rr
= gimplify_build2 (bsi
, code
, inner_type
, t2
, div
);
262 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, ratio
);
263 t2
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, ai
, t1
);
264 ri
= gimplify_build2 (bsi
, code
, inner_type
, t2
, div
);
266 update_complex_assignment (bsi
, rr
, ri
);
269 /* Expand complex division to scalars. */
272 expand_complex_division (block_stmt_iterator
*bsi
, tree inner_type
,
273 tree ar
, tree ai
, tree br
, tree bi
,
276 switch (flag_complex_divide_method
)
279 /* straightforward implementation of complex divide acceptable. */
280 expand_complex_div_straight (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
283 /* wide ranges of inputs must work for complex divide. */
284 expand_complex_div_wide (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
287 /* C99-like requirements for complex divide (not yet implemented). */
292 /* Expand complex negation to scalars:
297 expand_complex_negation (block_stmt_iterator
*bsi
, tree inner_type
,
302 rr
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ar
);
303 ri
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ai
);
305 update_complex_assignment (bsi
, rr
, ri
);
308 /* Expand complex conjugate to scalars:
313 expand_complex_conjugate (block_stmt_iterator
*bsi
, tree inner_type
,
318 ri
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ai
);
320 update_complex_assignment (bsi
, ar
, ri
);
323 /* Expand complex comparison (EQ or NE only). */
326 expand_complex_comparison (block_stmt_iterator
*bsi
, tree ar
, tree ai
,
327 tree br
, tree bi
, enum tree_code code
)
329 tree cr
, ci
, cc
, stmt
, type
;
331 cr
= gimplify_build2 (bsi
, code
, boolean_type_node
, ar
, br
);
332 ci
= gimplify_build2 (bsi
, code
, boolean_type_node
, ai
, bi
);
333 cc
= gimplify_build2 (bsi
,
334 (code
== EQ_EXPR
? TRUTH_AND_EXPR
: TRUTH_OR_EXPR
),
335 boolean_type_node
, cr
, ci
);
337 stmt
= bsi_stmt (*bsi
);
340 switch (TREE_CODE (stmt
))
343 stmt
= TREE_OPERAND (stmt
, 0);
346 type
= TREE_TYPE (TREE_OPERAND (stmt
, 1));
347 TREE_OPERAND (stmt
, 1) = fold_convert (type
, cc
);
350 TREE_OPERAND (stmt
, 0) = cc
;
357 /* Process one statement. If we identify a complex operation, expand it. */
360 expand_complex_operations_1 (block_stmt_iterator
*bsi
)
362 tree stmt
= bsi_stmt (*bsi
);
363 tree rhs
, type
, inner_type
;
364 tree ac
, ar
, ai
, bc
, br
, bi
;
367 switch (TREE_CODE (stmt
))
370 stmt
= TREE_OPERAND (stmt
, 0);
373 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
378 rhs
= TREE_OPERAND (stmt
, 1);
382 rhs
= TREE_OPERAND (stmt
, 0);
389 type
= TREE_TYPE (rhs
);
390 code
= TREE_CODE (rhs
);
392 /* Initial filter for operations we handle. */
405 if (TREE_CODE (type
) != COMPLEX_TYPE
)
407 inner_type
= TREE_TYPE (type
);
412 inner_type
= TREE_TYPE (TREE_OPERAND (rhs
, 1));
413 if (TREE_CODE (inner_type
) != COMPLEX_TYPE
)
421 /* Extract the components of the two complex values. Make sure and
422 handle the common case of the same value used twice specially. */
423 ac
= TREE_OPERAND (rhs
, 0);
424 ar
= extract_component (bsi
, ac
, 0);
425 ai
= extract_component (bsi
, ac
, 1);
427 if (TREE_CODE_CLASS (code
) == '1')
431 bc
= TREE_OPERAND (rhs
, 1);
436 br
= extract_component (bsi
, bc
, 0);
437 bi
= extract_component (bsi
, bc
, 1);
445 expand_complex_addition (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
449 expand_complex_multiplication (bsi
, inner_type
, ar
, ai
, br
, bi
);
457 expand_complex_division (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
461 expand_complex_negation (bsi
, inner_type
, ar
, ai
);
465 expand_complex_conjugate (bsi
, inner_type
, ar
, ai
);
470 expand_complex_comparison (bsi
, ar
, ai
, br
, bi
, code
);
478 /* Build a constant of type TYPE, made of VALUE's bits replicated
479 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
481 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
483 int width
= tree_low_cst (TYPE_SIZE (inner_type
), 1);
484 int n
= HOST_BITS_PER_WIDE_INT
/ width
;
485 unsigned HOST_WIDE_INT low
, high
, mask
;
491 if (width
== HOST_BITS_PER_WIDE_INT
)
495 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
496 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
499 if (TYPE_PRECISION (type
) < HOST_BITS_PER_WIDE_INT
)
500 low
&= ((HOST_WIDE_INT
)1 << TYPE_PRECISION (type
)) - 1, high
= 0;
501 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_WIDE_INT
)
503 else if (TYPE_PRECISION (type
) == 2 * HOST_BITS_PER_WIDE_INT
)
508 ret
= build_int_2 (low
, high
);
509 TREE_TYPE (ret
) = type
;
513 /* Return a suitable vector types made of SUBPARTS units each of mode
514 "word_mode" (the global variable). */
516 build_word_mode_vector_type (int nunits
)
518 static tree innertype
;
520 static int last_nunits
;
523 innertype
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
524 else if (last_nunits
== nunits
)
527 /* We build a new type, but we canonicalize it nevertheless,
528 because it still saves some memory. */
529 last_nunits
= nunits
;
530 last
= type_hash_canon (nunits
, build_vector_type (innertype
, nunits
));
534 typedef tree (*elem_op_func
) (block_stmt_iterator
*,
535 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
538 tree_vec_extract (block_stmt_iterator
*bsi
, tree type
,
539 tree t
, tree bitsize
, tree bitpos
)
542 return gimplify_build3 (bsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
544 return gimplify_build1 (bsi
, VIEW_CONVERT_EXPR
, type
, t
);
548 do_unop (block_stmt_iterator
*bsi
, tree inner_type
, tree a
,
549 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
552 a
= tree_vec_extract (bsi
, inner_type
, a
, bitsize
, bitpos
);
553 return gimplify_build1 (bsi
, code
, inner_type
, a
);
557 do_binop (block_stmt_iterator
*bsi
, tree inner_type
, tree a
, tree b
,
558 tree bitpos
, tree bitsize
, enum tree_code code
)
560 a
= tree_vec_extract (bsi
, inner_type
, a
, bitsize
, bitpos
);
561 b
= tree_vec_extract (bsi
, inner_type
, b
, bitsize
, bitpos
);
562 return gimplify_build2 (bsi
, code
, inner_type
, a
, b
);
565 /* Expand vector addition to scalars. This does bit twiddling
566 in order to increase parallelism:
568 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
571 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
572 (a ^ ~b) & 0x80808080
574 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
576 This optimization should be done only if 4 vector items or more
579 do_plus_minus (block_stmt_iterator
*bsi
, tree word_type
, tree a
, tree b
,
580 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
583 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
584 unsigned HOST_WIDE_INT max
;
585 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
587 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
588 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
589 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
591 a
= tree_vec_extract (bsi
, word_type
, a
, bitsize
, bitpos
);
592 b
= tree_vec_extract (bsi
, word_type
, b
, bitsize
, bitpos
);
594 signs
= gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
595 b_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
596 if (code
== PLUS_EXPR
)
597 a_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
600 a_low
= gimplify_build2 (bsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
601 signs
= gimplify_build1 (bsi
, BIT_NOT_EXPR
, word_type
, signs
);
604 signs
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
605 result_low
= gimplify_build2 (bsi
, code
, word_type
, a_low
, b_low
);
606 return gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
610 do_negate (block_stmt_iterator
*bsi
, tree word_type
, tree b
,
611 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
612 tree bitsize ATTRIBUTE_UNUSED
,
613 enum tree_code code ATTRIBUTE_UNUSED
)
615 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
617 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
619 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
620 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
621 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
623 b
= tree_vec_extract (bsi
, word_type
, b
, bitsize
, bitpos
);
625 b_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
626 signs
= gimplify_build1 (bsi
, BIT_NOT_EXPR
, word_type
, b
);
627 signs
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
628 result_low
= gimplify_build2 (bsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
629 return gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
632 /* Expand a vector operation to scalars, by using many operations
633 whose type is the vector type's inner type. */
635 expand_vector_piecewise (block_stmt_iterator
*bsi
, elem_op_func f
,
636 tree type
, tree inner_type
,
637 tree a
, tree b
, enum tree_code code
)
639 tree head
, *chain
= &head
;
640 tree part_width
= TYPE_SIZE (inner_type
);
641 tree index
= bitsize_int (0);
642 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
643 int delta
= tree_low_cst (part_width
, 1)
644 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type
)), 1);
647 for (i
= 0; i
< nunits
;
648 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
, 0))
650 tree result
= f (bsi
, inner_type
, a
, b
, index
, part_width
, code
);
651 *chain
= tree_cons (NULL_TREE
, result
, NULL_TREE
);
652 chain
= &TREE_CHAIN (*chain
);
655 return build1 (CONSTRUCTOR
, type
, head
);
658 /* Expand a vector operation to scalars with the freedom to use
659 a scalar integer type, or to use a different size for the items
660 in the vector type. */
662 expand_vector_parallel (block_stmt_iterator
*bsi
, elem_op_func f
, tree type
,
666 tree result
, compute_type
;
667 enum machine_mode mode
;
668 int n_words
= tree_low_cst (TYPE_SIZE_UNIT (type
), 1) / UNITS_PER_WORD
;
670 /* We have three strategies. If the type is already correct, just do
671 the operation an element at a time. Else, if the vector is wider than
672 one word, do it a word at a time; finally, if the vector is smaller
673 than one word, do it as a scalar. */
674 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
675 return expand_vector_piecewise (bsi
, f
,
676 type
, TREE_TYPE (type
),
678 else if (n_words
> 1)
680 tree word_type
= build_word_mode_vector_type (n_words
);
681 result
= expand_vector_piecewise (bsi
, f
,
682 word_type
, TREE_TYPE (word_type
),
684 result
= gimplify_val (bsi
, word_type
, result
);
688 /* Use a single scalar operation with a mode no wider than word_mode. */
689 mode
= mode_for_size (tree_low_cst (TYPE_SIZE (type
), 1), MODE_INT
, 0);
690 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
691 result
= f (bsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
694 return build1 (VIEW_CONVERT_EXPR
, type
, result
);
697 /* Expand a vector operation to scalars; for integer types we can use
698 special bit twiddling tricks to do the sums a word at a time, using
699 function F_PARALLEL instead of F. These tricks are done only if
700 they can process at least four items, that is, only if the vector
701 holds at least four items and if a word can hold four items. */
703 expand_vector_addition (block_stmt_iterator
*bsi
,
704 elem_op_func f
, elem_op_func f_parallel
,
705 tree type
, tree a
, tree b
, enum tree_code code
)
707 int parts_per_word
= UNITS_PER_WORD
708 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type
)), 1);
710 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
711 && parts_per_word
>= 4
712 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
713 return expand_vector_parallel (bsi
, f_parallel
,
716 return expand_vector_piecewise (bsi
, f
,
717 type
, TREE_TYPE (type
),
721 /* Return a type for the widest vector mode whose components are of mode
722 INNER_MODE, or NULL_TREE if none is found. */
724 type_for_widest_vector_mode (enum machine_mode inner_mode
, optab op
)
726 enum machine_mode best_mode
= VOIDmode
, mode
;
729 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
730 mode
= MIN_MODE_VECTOR_FLOAT
;
732 mode
= MIN_MODE_VECTOR_INT
;
734 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
735 if (GET_MODE_INNER (mode
) == inner_mode
736 && GET_MODE_NUNITS (mode
) > best_nunits
737 && op
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
738 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
740 if (best_mode
== VOIDmode
)
743 return lang_hooks
.types
.type_for_mode (best_mode
, 1);
746 /* Process one statement. If we identify a vector operation, expand it. */
749 expand_vector_operations_1 (block_stmt_iterator
*bsi
)
751 tree stmt
= bsi_stmt (*bsi
);
752 tree
*p_rhs
, rhs
, type
, compute_type
;
754 enum machine_mode compute_mode
;
757 switch (TREE_CODE (stmt
))
760 stmt
= TREE_OPERAND (stmt
, 0);
761 if (!stmt
|| TREE_CODE (stmt
) != MODIFY_EXPR
)
767 p_rhs
= &TREE_OPERAND (stmt
, 1);
775 type
= TREE_TYPE (rhs
);
776 if (TREE_CODE (type
) != VECTOR_TYPE
)
779 code
= TREE_CODE (rhs
);
780 if (TREE_CODE_CLASS (code
) != '1'
781 && TREE_CODE_CLASS (code
) != '2')
784 if (code
== NOP_EXPR
|| code
== VIEW_CONVERT_EXPR
)
787 if (code
== CONVERT_EXPR
)
790 op
= optab_for_tree_code (code
, type
);
792 /* Optabs will try converting a negation into a subtraction, so
793 look for it as well. TODO: negation of floating-point vectors
794 might be turned into an exclusive OR toggling the sign bit. */
796 && code
== NEGATE_EXPR
797 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
798 op
= optab_for_tree_code (MINUS_EXPR
, type
);
800 /* For very wide vectors, try using a smaller vector mode. */
802 if (TYPE_MODE (type
) == BLKmode
&& op
)
804 tree vector_compute_type
805 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type
)), op
);
806 if (vector_compute_type
!= NULL_TREE
)
807 compute_type
= vector_compute_type
;
810 compute_mode
= TYPE_MODE (compute_type
);
812 /* If we are breaking a BLKmode vector into smaller pieces,
813 type_for_widest_vector_mode has already looked into the optab,
814 so skip these checks. */
815 if (compute_type
== type
)
817 if ((GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_INT
818 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FLOAT
)
820 && op
->handlers
[compute_mode
].insn_code
!= CODE_FOR_nothing
)
824 /* There is no operation in hardware, so fall back to scalars. */
825 compute_type
= TREE_TYPE (type
);
826 compute_mode
= TYPE_MODE (compute_type
);
830 /* If the compute mode is not a vector mode (hence we are decomposing
831 a BLKmode vector to smaller, hardware-supported vectors), we may
832 want to expand the operations in parallel. */
833 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
834 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
)
839 if (TYPE_TRAP_SIGNED (type
))
842 *p_rhs
= expand_vector_addition (bsi
, do_binop
, do_plus_minus
, type
,
843 TREE_OPERAND (rhs
, 0),
844 TREE_OPERAND (rhs
, 1), code
);
845 modify_stmt (bsi_stmt (*bsi
));
849 if (TYPE_TRAP_SIGNED (type
))
852 *p_rhs
= expand_vector_addition (bsi
, do_unop
, do_negate
, type
,
853 TREE_OPERAND (rhs
, 0),
855 modify_stmt (bsi_stmt (*bsi
));
861 *p_rhs
= expand_vector_parallel (bsi
, do_binop
, type
,
862 TREE_OPERAND (rhs
, 0),
863 TREE_OPERAND (rhs
, 1), code
);
864 modify_stmt (bsi_stmt (*bsi
));
868 *p_rhs
= expand_vector_parallel (bsi
, do_unop
, type
,
869 TREE_OPERAND (rhs
, 0),
871 modify_stmt (bsi_stmt (*bsi
));
878 if (TREE_CODE_CLASS (code
) == '1')
879 *p_rhs
= expand_vector_piecewise (bsi
, do_unop
, type
, compute_type
,
880 TREE_OPERAND (rhs
, 0),
883 *p_rhs
= expand_vector_piecewise (bsi
, do_binop
, type
, compute_type
,
884 TREE_OPERAND (rhs
, 0),
885 TREE_OPERAND (rhs
, 1), code
);
887 modify_stmt (bsi_stmt (*bsi
));
891 expand_vector_operations (void)
893 block_stmt_iterator bsi
;
898 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
899 expand_vector_operations_1 (&bsi
);
904 tree_lower_operations (void)
906 int old_last_basic_block
= last_basic_block
;
907 block_stmt_iterator bsi
;
912 if (bb
->index
>= old_last_basic_block
)
914 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
916 expand_complex_operations_1 (&bsi
);
917 expand_vector_operations_1 (&bsi
);
923 struct tree_opt_pass pass_lower_vector_ssa
=
927 expand_vector_operations
, /* execute */
930 0, /* static_pass_number */
932 PROP_cfg
, /* properties_required */
933 0, /* properties_provided */
934 0, /* properties_destroyed */
935 0, /* todo_flags_start */
936 TODO_dump_func
| TODO_rename_vars
/* todo_flags_finish */
937 | TODO_ggc_collect
| TODO_verify_ssa
938 | TODO_verify_stmts
| TODO_verify_flow
941 struct tree_opt_pass pass_pre_expand
=
943 "oplower", /* name */
945 tree_lower_operations
, /* execute */
948 0, /* static_pass_number */
950 PROP_cfg
, /* properties_required */
951 0, /* properties_provided */
952 0, /* properties_destroyed */
953 0, /* todo_flags_start */
954 TODO_dump_func
| TODO_ggc_collect
955 | TODO_verify_stmts
/* todo_flags_finish */