1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
40 /* Extract the real or imaginary part of a complex variable or constant.
41 Make sure that it's a proper gimple_val and gimplify it if not.
42 Emit any new code before BSI. */
45 extract_component (block_stmt_iterator
*bsi
, tree t
, bool imagpart_p
)
49 inner_type
= TREE_TYPE (TREE_TYPE (t
));
50 switch (TREE_CODE (t
))
53 ret
= (imagpart_p
? TREE_IMAGPART (t
) : TREE_REALPART (t
));
57 ret
= TREE_OPERAND (t
, imagpart_p
);
62 ret
= build1 ((imagpart_p
? IMAGPART_EXPR
: REALPART_EXPR
),
70 return gimplify_val (bsi
, inner_type
, ret
);
73 /* Update an assignment to a complex variable in place. */
76 update_complex_assignment (block_stmt_iterator
*bsi
, tree r
, tree i
)
78 tree stmt
= bsi_stmt (*bsi
);
81 if (TREE_CODE (stmt
) == RETURN_EXPR
)
82 stmt
= TREE_OPERAND (stmt
, 0);
84 type
= TREE_TYPE (TREE_OPERAND (stmt
, 1));
85 TREE_OPERAND (stmt
, 1) = build (COMPLEX_EXPR
, type
, r
, i
);
89 /* Expand complex addition to scalars:
90 a + b = (ar + br) + i(ai + bi)
91 a - b = (ar - br) + i(ai + bi)
95 expand_complex_addition (block_stmt_iterator
*bsi
, tree inner_type
,
96 tree ar
, tree ai
, tree br
, tree bi
,
101 rr
= gimplify_build2 (bsi
, code
, inner_type
, ar
, br
);
102 ri
= gimplify_build2 (bsi
, code
, inner_type
, ai
, bi
);
104 update_complex_assignment (bsi
, rr
, ri
);
107 /* Expand complex multiplication to scalars:
108 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
112 expand_complex_multiplication (block_stmt_iterator
*bsi
, tree inner_type
,
113 tree ar
, tree ai
, tree br
, tree bi
)
115 tree t1
, t2
, t3
, t4
, rr
, ri
;
117 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, br
);
118 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, bi
);
119 t3
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, bi
);
121 /* Avoid expanding redundant multiplication for the common
122 case of squaring a complex number. */
123 if (ar
== br
&& ai
== bi
)
126 t4
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, br
);
128 rr
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, t1
, t2
);
129 ri
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t3
, t4
);
131 update_complex_assignment (bsi
, rr
, ri
);
134 /* Expand complex division to scalars, straightforward algorithm.
135 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
140 expand_complex_div_straight (block_stmt_iterator
*bsi
, tree inner_type
,
141 tree ar
, tree ai
, tree br
, tree bi
,
144 tree rr
, ri
, div
, t1
, t2
, t3
;
146 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, br
, br
);
147 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, bi
, bi
);
148 div
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, t2
);
150 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, br
);
151 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, bi
);
152 t3
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, t2
);
153 rr
= gimplify_build2 (bsi
, code
, inner_type
, t3
, div
);
155 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, br
);
156 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, bi
);
157 t3
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, t1
, t2
);
158 ri
= gimplify_build2 (bsi
, code
, inner_type
, t3
, div
);
160 update_complex_assignment (bsi
, rr
, ri
);
163 /* Expand complex division to scalars, modified algorithm to minimize
164 overflow with wide input ranges. */
167 expand_complex_div_wide (block_stmt_iterator
*bsi
, tree inner_type
,
168 tree ar
, tree ai
, tree br
, tree bi
,
171 tree rr
, ri
, ratio
, div
, t1
, t2
, min
, max
, cond
;
173 /* Examine |br| < |bi|, and branch. */
174 t1
= gimplify_build1 (bsi
, ABS_EXPR
, inner_type
, br
);
175 t2
= gimplify_build1 (bsi
, ABS_EXPR
, inner_type
, bi
);
176 cond
= fold (build (LT_EXPR
, boolean_type_node
, t1
, t2
));
179 if (TREE_CONSTANT (cond
))
181 if (integer_zerop (cond
))
188 basic_block bb_cond
, bb_true
, bb_false
, bb_join
;
192 l1
= create_artificial_label ();
193 t1
= build (GOTO_EXPR
, void_type_node
, l1
);
194 l2
= create_artificial_label ();
195 t2
= build (GOTO_EXPR
, void_type_node
, l2
);
196 cond
= build (COND_EXPR
, void_type_node
, cond
, t1
, t2
);
197 bsi_insert_before (bsi
, cond
, BSI_SAME_STMT
);
199 min
= make_rename_temp (inner_type
, NULL
);
200 max
= make_rename_temp (inner_type
, NULL
);
201 l3
= create_artificial_label ();
203 /* Split the original block, and create the TRUE and FALSE blocks. */
204 e
= split_block (bsi
->bb
, cond
);
207 bb_true
= create_empty_bb (bb_cond
);
208 bb_false
= create_empty_bb (bb_true
);
210 /* Wire the blocks together. */
211 e
->flags
= EDGE_TRUE_VALUE
;
212 redirect_edge_succ (e
, bb_true
);
213 make_edge (bb_cond
, bb_false
, EDGE_FALSE_VALUE
);
214 make_edge (bb_true
, bb_join
, 0);
215 make_edge (bb_false
, bb_join
, 0);
217 /* Update dominance info. Note that bb_join's data was
218 updated by split_block. */
219 if (dom_computed
[CDI_DOMINATORS
] >= DOM_CONS_OK
)
221 set_immediate_dominator (CDI_DOMINATORS
, bb_true
, bb_cond
);
222 set_immediate_dominator (CDI_DOMINATORS
, bb_false
, bb_cond
);
225 /* Compute min and max for TRUE block. */
226 *bsi
= bsi_start (bb_true
);
227 t1
= build (LABEL_EXPR
, void_type_node
, l1
);
228 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
229 t1
= build (MODIFY_EXPR
, inner_type
, min
, br
);
230 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
231 t1
= build (MODIFY_EXPR
, inner_type
, max
, bi
);
232 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
234 /* Compute min and max for FALSE block. */
235 *bsi
= bsi_start (bb_false
);
236 t1
= build (LABEL_EXPR
, void_type_node
, l2
);
237 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
238 t1
= build (MODIFY_EXPR
, inner_type
, min
, bi
);
239 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
240 t1
= build (MODIFY_EXPR
, inner_type
, max
, br
);
241 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
243 /* Insert the join label into the tail of the original block. */
244 *bsi
= bsi_start (bb_join
);
245 t1
= build (LABEL_EXPR
, void_type_node
, l3
);
246 bsi_insert_before (bsi
, t1
, BSI_SAME_STMT
);
249 /* Now we have MIN(|br|, |bi|) and MAX(|br|, |bi|). We now use the
250 ratio min/max to scale both the dividend and divisor. */
251 ratio
= gimplify_build2 (bsi
, code
, inner_type
, min
, max
);
253 /* Calculate the divisor: min*ratio + max. */
254 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, min
, ratio
);
255 div
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, max
);
257 /* Result is now ((ar + ai*ratio)/div) + i((ai - ar*ratio)/div). */
258 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, ratio
);
259 t2
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, ar
, t1
);
260 rr
= gimplify_build2 (bsi
, code
, inner_type
, t2
, div
);
262 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, ratio
);
263 t2
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, ai
, t1
);
264 ri
= gimplify_build2 (bsi
, code
, inner_type
, t2
, div
);
266 update_complex_assignment (bsi
, rr
, ri
);
269 /* Expand complex division to scalars. */
272 expand_complex_division (block_stmt_iterator
*bsi
, tree inner_type
,
273 tree ar
, tree ai
, tree br
, tree bi
,
276 switch (flag_complex_divide_method
)
279 /* straightforward implementation of complex divide acceptable. */
280 expand_complex_div_straight (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
283 /* wide ranges of inputs must work for complex divide. */
284 expand_complex_div_wide (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
287 /* C99-like requirements for complex divide (not yet implemented). */
292 /* Expand complex negation to scalars:
297 expand_complex_negation (block_stmt_iterator
*bsi
, tree inner_type
,
302 rr
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ar
);
303 ri
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ai
);
305 update_complex_assignment (bsi
, rr
, ri
);
308 /* Expand complex conjugate to scalars:
313 expand_complex_conjugate (block_stmt_iterator
*bsi
, tree inner_type
,
318 ri
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ai
);
320 update_complex_assignment (bsi
, ar
, ri
);
323 /* Expand complex comparison (EQ or NE only). */
326 expand_complex_comparison (block_stmt_iterator
*bsi
, tree ar
, tree ai
,
327 tree br
, tree bi
, enum tree_code code
)
329 tree cr
, ci
, cc
, stmt
, expr
, type
;
331 cr
= gimplify_build2 (bsi
, code
, boolean_type_node
, ar
, br
);
332 ci
= gimplify_build2 (bsi
, code
, boolean_type_node
, ai
, bi
);
333 cc
= gimplify_build2 (bsi
,
334 (code
== EQ_EXPR
? TRUTH_AND_EXPR
: TRUTH_OR_EXPR
),
335 boolean_type_node
, cr
, ci
);
337 stmt
= expr
= bsi_stmt (*bsi
);
339 switch (TREE_CODE (stmt
))
342 expr
= TREE_OPERAND (stmt
, 0);
345 type
= TREE_TYPE (TREE_OPERAND (expr
, 1));
346 TREE_OPERAND (expr
, 1) = fold_convert (type
, cc
);
349 TREE_OPERAND (stmt
, 0) = cc
;
358 /* Process one statement. If we identify a complex operation, expand it. */
361 expand_complex_operations_1 (block_stmt_iterator
*bsi
)
363 tree stmt
= bsi_stmt (*bsi
);
364 tree rhs
, type
, inner_type
;
365 tree ac
, ar
, ai
, bc
, br
, bi
;
368 switch (TREE_CODE (stmt
))
371 stmt
= TREE_OPERAND (stmt
, 0);
374 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
379 rhs
= TREE_OPERAND (stmt
, 1);
383 rhs
= TREE_OPERAND (stmt
, 0);
390 type
= TREE_TYPE (rhs
);
391 code
= TREE_CODE (rhs
);
393 /* Initial filter for operations we handle. */
406 if (TREE_CODE (type
) != COMPLEX_TYPE
)
408 inner_type
= TREE_TYPE (type
);
413 inner_type
= TREE_TYPE (TREE_OPERAND (rhs
, 1));
414 if (TREE_CODE (inner_type
) != COMPLEX_TYPE
)
422 /* Extract the components of the two complex values. Make sure and
423 handle the common case of the same value used twice specially. */
424 ac
= TREE_OPERAND (rhs
, 0);
425 ar
= extract_component (bsi
, ac
, 0);
426 ai
= extract_component (bsi
, ac
, 1);
428 if (TREE_CODE_CLASS (code
) == tcc_unary
)
432 bc
= TREE_OPERAND (rhs
, 1);
437 br
= extract_component (bsi
, bc
, 0);
438 bi
= extract_component (bsi
, bc
, 1);
446 expand_complex_addition (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
450 expand_complex_multiplication (bsi
, inner_type
, ar
, ai
, br
, bi
);
458 expand_complex_division (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
462 expand_complex_negation (bsi
, inner_type
, ar
, ai
);
466 expand_complex_conjugate (bsi
, inner_type
, ar
, ai
);
471 expand_complex_comparison (bsi
, ar
, ai
, br
, bi
, code
);
479 /* Build a constant of type TYPE, made of VALUE's bits replicated
480 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
482 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
484 int width
= tree_low_cst (TYPE_SIZE (inner_type
), 1);
485 int n
= HOST_BITS_PER_WIDE_INT
/ width
;
486 unsigned HOST_WIDE_INT low
, high
, mask
;
491 if (width
== HOST_BITS_PER_WIDE_INT
)
495 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
496 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
499 if (TYPE_PRECISION (type
) < HOST_BITS_PER_WIDE_INT
)
500 low
&= ((HOST_WIDE_INT
)1 << TYPE_PRECISION (type
)) - 1, high
= 0;
501 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_WIDE_INT
)
503 else if (TYPE_PRECISION (type
) == 2 * HOST_BITS_PER_WIDE_INT
)
508 ret
= build_int_cst_wide (type
, low
, high
);
512 /* Return a suitable vector types made of SUBPARTS units each of mode
513 "word_mode" (the global variable). */
515 build_word_mode_vector_type (int nunits
)
517 static tree innertype
;
519 static int last_nunits
;
522 innertype
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
523 else if (last_nunits
== nunits
)
526 /* We build a new type, but we canonicalize it nevertheless,
527 because it still saves some memory. */
528 last_nunits
= nunits
;
529 last
= type_hash_canon (nunits
, build_vector_type (innertype
, nunits
));
533 typedef tree (*elem_op_func
) (block_stmt_iterator
*,
534 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
537 tree_vec_extract (block_stmt_iterator
*bsi
, tree type
,
538 tree t
, tree bitsize
, tree bitpos
)
541 return gimplify_build3 (bsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
543 return gimplify_build1 (bsi
, VIEW_CONVERT_EXPR
, type
, t
);
547 do_unop (block_stmt_iterator
*bsi
, tree inner_type
, tree a
,
548 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
551 a
= tree_vec_extract (bsi
, inner_type
, a
, bitsize
, bitpos
);
552 return gimplify_build1 (bsi
, code
, inner_type
, a
);
556 do_binop (block_stmt_iterator
*bsi
, tree inner_type
, tree a
, tree b
,
557 tree bitpos
, tree bitsize
, enum tree_code code
)
559 a
= tree_vec_extract (bsi
, inner_type
, a
, bitsize
, bitpos
);
560 b
= tree_vec_extract (bsi
, inner_type
, b
, bitsize
, bitpos
);
561 return gimplify_build2 (bsi
, code
, inner_type
, a
, b
);
564 /* Expand vector addition to scalars. This does bit twiddling
565 in order to increase parallelism:
567 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
570 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
571 (a ^ ~b) & 0x80808080
573 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
575 This optimization should be done only if 4 vector items or more
578 do_plus_minus (block_stmt_iterator
*bsi
, tree word_type
, tree a
, tree b
,
579 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
582 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
583 unsigned HOST_WIDE_INT max
;
584 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
586 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
587 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
588 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
590 a
= tree_vec_extract (bsi
, word_type
, a
, bitsize
, bitpos
);
591 b
= tree_vec_extract (bsi
, word_type
, b
, bitsize
, bitpos
);
593 signs
= gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
594 b_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
595 if (code
== PLUS_EXPR
)
596 a_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
599 a_low
= gimplify_build2 (bsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
600 signs
= gimplify_build1 (bsi
, BIT_NOT_EXPR
, word_type
, signs
);
603 signs
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
604 result_low
= gimplify_build2 (bsi
, code
, word_type
, a_low
, b_low
);
605 return gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
609 do_negate (block_stmt_iterator
*bsi
, tree word_type
, tree b
,
610 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
611 tree bitsize ATTRIBUTE_UNUSED
,
612 enum tree_code code ATTRIBUTE_UNUSED
)
614 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
616 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
618 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
619 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
620 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
622 b
= tree_vec_extract (bsi
, word_type
, b
, bitsize
, bitpos
);
624 b_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
625 signs
= gimplify_build1 (bsi
, BIT_NOT_EXPR
, word_type
, b
);
626 signs
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
627 result_low
= gimplify_build2 (bsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
628 return gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
631 /* Expand a vector operation to scalars, by using many operations
632 whose type is the vector type's inner type. */
634 expand_vector_piecewise (block_stmt_iterator
*bsi
, elem_op_func f
,
635 tree type
, tree inner_type
,
636 tree a
, tree b
, enum tree_code code
)
638 tree head
, *chain
= &head
;
639 tree part_width
= TYPE_SIZE (inner_type
);
640 tree index
= bitsize_int (0);
641 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
642 int delta
= tree_low_cst (part_width
, 1)
643 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type
)), 1);
646 for (i
= 0; i
< nunits
;
647 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
, 0))
649 tree result
= f (bsi
, inner_type
, a
, b
, index
, part_width
, code
);
650 *chain
= tree_cons (NULL_TREE
, result
, NULL_TREE
);
651 chain
= &TREE_CHAIN (*chain
);
654 return build1 (CONSTRUCTOR
, type
, head
);
657 /* Expand a vector operation to scalars with the freedom to use
658 a scalar integer type, or to use a different size for the items
659 in the vector type. */
661 expand_vector_parallel (block_stmt_iterator
*bsi
, elem_op_func f
, tree type
,
665 tree result
, compute_type
;
666 enum machine_mode mode
;
667 int n_words
= tree_low_cst (TYPE_SIZE_UNIT (type
), 1) / UNITS_PER_WORD
;
669 /* We have three strategies. If the type is already correct, just do
670 the operation an element at a time. Else, if the vector is wider than
671 one word, do it a word at a time; finally, if the vector is smaller
672 than one word, do it as a scalar. */
673 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
674 return expand_vector_piecewise (bsi
, f
,
675 type
, TREE_TYPE (type
),
677 else if (n_words
> 1)
679 tree word_type
= build_word_mode_vector_type (n_words
);
680 result
= expand_vector_piecewise (bsi
, f
,
681 word_type
, TREE_TYPE (word_type
),
683 result
= gimplify_val (bsi
, word_type
, result
);
687 /* Use a single scalar operation with a mode no wider than word_mode. */
688 mode
= mode_for_size (tree_low_cst (TYPE_SIZE (type
), 1), MODE_INT
, 0);
689 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
690 result
= f (bsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
693 return build1 (VIEW_CONVERT_EXPR
, type
, result
);
696 /* Expand a vector operation to scalars; for integer types we can use
697 special bit twiddling tricks to do the sums a word at a time, using
698 function F_PARALLEL instead of F. These tricks are done only if
699 they can process at least four items, that is, only if the vector
700 holds at least four items and if a word can hold four items. */
702 expand_vector_addition (block_stmt_iterator
*bsi
,
703 elem_op_func f
, elem_op_func f_parallel
,
704 tree type
, tree a
, tree b
, enum tree_code code
)
706 int parts_per_word
= UNITS_PER_WORD
707 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type
)), 1);
709 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
710 && parts_per_word
>= 4
711 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
712 return expand_vector_parallel (bsi
, f_parallel
,
715 return expand_vector_piecewise (bsi
, f
,
716 type
, TREE_TYPE (type
),
720 /* Return a type for the widest vector mode whose components are of mode
721 INNER_MODE, or NULL_TREE if none is found. */
723 type_for_widest_vector_mode (enum machine_mode inner_mode
, optab op
)
725 enum machine_mode best_mode
= VOIDmode
, mode
;
728 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
729 mode
= MIN_MODE_VECTOR_FLOAT
;
731 mode
= MIN_MODE_VECTOR_INT
;
733 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
734 if (GET_MODE_INNER (mode
) == inner_mode
735 && GET_MODE_NUNITS (mode
) > best_nunits
736 && op
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
737 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
739 if (best_mode
== VOIDmode
)
742 return lang_hooks
.types
.type_for_mode (best_mode
, 1);
745 /* Process one statement. If we identify a vector operation, expand it. */
748 expand_vector_operations_1 (block_stmt_iterator
*bsi
)
750 tree stmt
= bsi_stmt (*bsi
);
751 tree
*p_rhs
, rhs
, type
, compute_type
;
753 enum machine_mode compute_mode
;
756 switch (TREE_CODE (stmt
))
759 stmt
= TREE_OPERAND (stmt
, 0);
760 if (!stmt
|| TREE_CODE (stmt
) != MODIFY_EXPR
)
766 p_rhs
= &TREE_OPERAND (stmt
, 1);
774 type
= TREE_TYPE (rhs
);
775 if (TREE_CODE (type
) != VECTOR_TYPE
)
778 code
= TREE_CODE (rhs
);
779 if (TREE_CODE_CLASS (code
) != tcc_unary
780 && TREE_CODE_CLASS (code
) != tcc_binary
)
783 if (code
== NOP_EXPR
|| code
== VIEW_CONVERT_EXPR
)
786 gcc_assert (code
!= CONVERT_EXPR
);
787 op
= optab_for_tree_code (code
, type
);
789 /* Optabs will try converting a negation into a subtraction, so
790 look for it as well. TODO: negation of floating-point vectors
791 might be turned into an exclusive OR toggling the sign bit. */
793 && code
== NEGATE_EXPR
794 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
795 op
= optab_for_tree_code (MINUS_EXPR
, type
);
797 /* For very wide vectors, try using a smaller vector mode. */
799 if (TYPE_MODE (type
) == BLKmode
&& op
)
801 tree vector_compute_type
802 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type
)), op
);
803 if (vector_compute_type
!= NULL_TREE
)
804 compute_type
= vector_compute_type
;
807 compute_mode
= TYPE_MODE (compute_type
);
809 /* If we are breaking a BLKmode vector into smaller pieces,
810 type_for_widest_vector_mode has already looked into the optab,
811 so skip these checks. */
812 if (compute_type
== type
)
814 if ((GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_INT
815 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FLOAT
)
817 && op
->handlers
[compute_mode
].insn_code
!= CODE_FOR_nothing
)
821 /* There is no operation in hardware, so fall back to scalars. */
822 compute_type
= TREE_TYPE (type
);
823 compute_mode
= TYPE_MODE (compute_type
);
827 /* If the compute mode is not a vector mode (hence we are decomposing
828 a BLKmode vector to smaller, hardware-supported vectors), we may
829 want to expand the operations in parallel. */
830 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
831 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
)
836 if (TYPE_TRAP_SIGNED (type
))
839 *p_rhs
= expand_vector_addition (bsi
, do_binop
, do_plus_minus
, type
,
840 TREE_OPERAND (rhs
, 0),
841 TREE_OPERAND (rhs
, 1), code
);
842 modify_stmt (bsi_stmt (*bsi
));
846 if (TYPE_TRAP_SIGNED (type
))
849 *p_rhs
= expand_vector_addition (bsi
, do_unop
, do_negate
, type
,
850 TREE_OPERAND (rhs
, 0),
852 modify_stmt (bsi_stmt (*bsi
));
858 *p_rhs
= expand_vector_parallel (bsi
, do_binop
, type
,
859 TREE_OPERAND (rhs
, 0),
860 TREE_OPERAND (rhs
, 1), code
);
861 modify_stmt (bsi_stmt (*bsi
));
865 *p_rhs
= expand_vector_parallel (bsi
, do_unop
, type
,
866 TREE_OPERAND (rhs
, 0),
868 modify_stmt (bsi_stmt (*bsi
));
875 if (TREE_CODE_CLASS (code
) == tcc_unary
)
876 *p_rhs
= expand_vector_piecewise (bsi
, do_unop
, type
, compute_type
,
877 TREE_OPERAND (rhs
, 0),
880 *p_rhs
= expand_vector_piecewise (bsi
, do_binop
, type
, compute_type
,
881 TREE_OPERAND (rhs
, 0),
882 TREE_OPERAND (rhs
, 1), code
);
884 modify_stmt (bsi_stmt (*bsi
));
888 expand_vector_operations (void)
890 block_stmt_iterator bsi
;
895 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
896 expand_vector_operations_1 (&bsi
);
901 tree_lower_operations (void)
903 int old_last_basic_block
= last_basic_block
;
904 block_stmt_iterator bsi
;
909 if (bb
->index
>= old_last_basic_block
)
911 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
913 expand_complex_operations_1 (&bsi
);
914 expand_vector_operations_1 (&bsi
);
920 struct tree_opt_pass pass_lower_vector_ssa
=
924 expand_vector_operations
, /* execute */
927 0, /* static_pass_number */
929 PROP_cfg
, /* properties_required */
930 0, /* properties_provided */
931 0, /* properties_destroyed */
932 0, /* todo_flags_start */
933 TODO_dump_func
| TODO_rename_vars
/* todo_flags_finish */
934 | TODO_ggc_collect
| TODO_verify_ssa
935 | TODO_verify_stmts
| TODO_verify_flow
,
939 struct tree_opt_pass pass_pre_expand
=
941 "oplower", /* name */
943 tree_lower_operations
, /* execute */
946 0, /* static_pass_number */
948 PROP_cfg
, /* properties_required */
949 0, /* properties_provided */
950 0, /* properties_destroyed */
951 0, /* todo_flags_start */
952 TODO_dump_func
| TODO_ggc_collect
953 | TODO_verify_stmts
, /* todo_flags_finish */