1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
41 /* Extract the real or imaginary part of a complex variable or constant.
42 Make sure that it's a proper gimple_val and gimplify it if not.
43 Emit any new code before BSI. */
46 extract_component (block_stmt_iterator
*bsi
, tree t
, bool imagpart_p
)
50 inner_type
= TREE_TYPE (TREE_TYPE (t
));
51 switch (TREE_CODE (t
))
54 ret
= (imagpart_p
? TREE_IMAGPART (t
) : TREE_REALPART (t
));
58 ret
= TREE_OPERAND (t
, imagpart_p
);
63 ret
= build1 ((imagpart_p
? IMAGPART_EXPR
: REALPART_EXPR
),
71 return gimplify_val (bsi
, inner_type
, ret
);
74 /* Update an assignment to a complex variable in place. */
77 update_complex_assignment (block_stmt_iterator
*bsi
, tree r
, tree i
)
79 tree stmt
= bsi_stmt (*bsi
);
82 if (TREE_CODE (stmt
) == RETURN_EXPR
)
83 stmt
= TREE_OPERAND (stmt
, 0);
85 type
= TREE_TYPE (TREE_OPERAND (stmt
, 1));
86 TREE_OPERAND (stmt
, 1) = build (COMPLEX_EXPR
, type
, r
, i
);
90 /* Expand complex addition to scalars:
91 a + b = (ar + br) + i(ai + bi)
92 a - b = (ar - br) + i(ai + bi)
96 expand_complex_addition (block_stmt_iterator
*bsi
, tree inner_type
,
97 tree ar
, tree ai
, tree br
, tree bi
,
102 rr
= gimplify_build2 (bsi
, code
, inner_type
, ar
, br
);
103 ri
= gimplify_build2 (bsi
, code
, inner_type
, ai
, bi
);
105 update_complex_assignment (bsi
, rr
, ri
);
108 /* Expand complex multiplication to scalars:
109 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
113 expand_complex_multiplication (block_stmt_iterator
*bsi
, tree inner_type
,
114 tree ar
, tree ai
, tree br
, tree bi
)
116 tree t1
, t2
, t3
, t4
, rr
, ri
;
118 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, br
);
119 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, bi
);
120 t3
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, bi
);
122 /* Avoid expanding redundant multiplication for the common
123 case of squaring a complex number. */
124 if (ar
== br
&& ai
== bi
)
127 t4
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, br
);
129 rr
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, t1
, t2
);
130 ri
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t3
, t4
);
132 update_complex_assignment (bsi
, rr
, ri
);
135 /* Expand complex division to scalars, straightforward algorithm.
136 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
141 expand_complex_div_straight (block_stmt_iterator
*bsi
, tree inner_type
,
142 tree ar
, tree ai
, tree br
, tree bi
,
145 tree rr
, ri
, div
, t1
, t2
, t3
;
147 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, br
, br
);
148 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, bi
, bi
);
149 div
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, t2
);
151 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, br
);
152 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, bi
);
153 t3
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, t2
);
154 rr
= gimplify_build2 (bsi
, code
, inner_type
, t3
, div
);
156 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, br
);
157 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, bi
);
158 t3
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, t1
, t2
);
159 ri
= gimplify_build2 (bsi
, code
, inner_type
, t3
, div
);
161 update_complex_assignment (bsi
, rr
, ri
);
164 /* Expand complex division to scalars, modified algorithm to minimize
165 overflow with wide input ranges. */
168 expand_complex_div_wide (block_stmt_iterator
*bsi
, tree inner_type
,
169 tree ar
, tree ai
, tree br
, tree bi
,
172 tree rr
, ri
, ratio
, div
, t1
, t2
, tr
, ti
, cond
;
173 basic_block bb_cond
, bb_true
, bb_false
, bb_join
;
175 /* Examine |br| < |bi|, and branch. */
176 t1
= gimplify_build1 (bsi
, ABS_EXPR
, inner_type
, br
);
177 t2
= gimplify_build1 (bsi
, ABS_EXPR
, inner_type
, bi
);
178 cond
= fold (build (LT_EXPR
, boolean_type_node
, t1
, t2
));
181 bb_cond
= bb_true
= bb_false
= bb_join
= NULL
;
182 rr
= ri
= tr
= ti
= NULL
;
183 if (!TREE_CONSTANT (cond
))
187 cond
= build (COND_EXPR
, void_type_node
, cond
, NULL
, NULL
);
188 bsi_insert_before (bsi
, cond
, BSI_SAME_STMT
);
190 /* Split the original block, and create the TRUE and FALSE blocks. */
191 e
= split_block (bsi
->bb
, cond
);
194 bb_true
= create_empty_bb (bb_cond
);
195 bb_false
= create_empty_bb (bb_true
);
197 t1
= build (GOTO_EXPR
, void_type_node
, tree_block_label (bb_true
));
198 t2
= build (GOTO_EXPR
, void_type_node
, tree_block_label (bb_false
));
199 COND_EXPR_THEN (cond
) = t1
;
200 COND_EXPR_ELSE (cond
) = t2
;
202 /* Wire the blocks together. */
203 e
->flags
= EDGE_TRUE_VALUE
;
204 redirect_edge_succ (e
, bb_true
);
205 make_edge (bb_cond
, bb_false
, EDGE_FALSE_VALUE
);
206 make_edge (bb_true
, bb_join
, EDGE_FALLTHRU
);
207 make_edge (bb_false
, bb_join
, EDGE_FALLTHRU
);
209 /* Update dominance info. Note that bb_join's data was
210 updated by split_block. */
211 if (dom_info_available_p (CDI_DOMINATORS
))
213 set_immediate_dominator (CDI_DOMINATORS
, bb_true
, bb_cond
);
214 set_immediate_dominator (CDI_DOMINATORS
, bb_false
, bb_cond
);
217 rr
= make_rename_temp (inner_type
, NULL
);
218 ri
= make_rename_temp (inner_type
, NULL
);
221 /* In the TRUE branch, we compute
223 div = (br * ratio) + bi;
224 tr = (ar * ratio) + ai;
225 ti = (ai * ratio) - ar;
228 if (bb_true
|| integer_nonzerop (cond
))
232 *bsi
= bsi_last (bb_true
);
233 bsi_insert_after (bsi
, build_empty_stmt (), BSI_NEW_STMT
);
236 ratio
= gimplify_build2 (bsi
, code
, inner_type
, br
, bi
);
238 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, br
, ratio
);
239 div
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, bi
);
241 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, ratio
);
242 tr
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, ai
);
244 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, ratio
);
245 ti
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, t1
, ar
);
247 tr
= gimplify_build2 (bsi
, code
, inner_type
, tr
, div
);
248 ti
= gimplify_build2 (bsi
, code
, inner_type
, ti
, div
);
252 t1
= build (MODIFY_EXPR
, inner_type
, rr
, tr
);
253 bsi_insert_before (bsi
, t1
, BSI_SAME_STMT
);
254 t1
= build (MODIFY_EXPR
, inner_type
, ri
, ti
);
255 bsi_insert_before (bsi
, t1
, BSI_SAME_STMT
);
260 /* In the FALSE branch, we compute
262 divisor = (d * ratio) + c;
263 tr = (b * ratio) + a;
264 ti = b - (a * ratio);
267 if (bb_false
|| integer_zerop (cond
))
271 *bsi
= bsi_last (bb_false
);
272 bsi_insert_after (bsi
, build_empty_stmt (), BSI_NEW_STMT
);
275 ratio
= gimplify_build2 (bsi
, code
, inner_type
, bi
, br
);
277 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, bi
, ratio
);
278 div
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, br
);
280 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, ratio
);
281 tr
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, ar
);
283 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, ratio
);
284 ti
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, ai
, t1
);
286 tr
= gimplify_build2 (bsi
, code
, inner_type
, tr
, div
);
287 ti
= gimplify_build2 (bsi
, code
, inner_type
, ti
, div
);
291 t1
= build (MODIFY_EXPR
, inner_type
, rr
, tr
);
292 bsi_insert_before (bsi
, t1
, BSI_SAME_STMT
);
293 t1
= build (MODIFY_EXPR
, inner_type
, ri
, ti
);
294 bsi_insert_before (bsi
, t1
, BSI_SAME_STMT
);
300 *bsi
= bsi_start (bb_join
);
304 update_complex_assignment (bsi
, rr
, ri
);
307 /* Expand complex division to scalars. */
310 expand_complex_division (block_stmt_iterator
*bsi
, tree inner_type
,
311 tree ar
, tree ai
, tree br
, tree bi
,
314 switch (flag_complex_divide_method
)
317 /* straightforward implementation of complex divide acceptable. */
318 expand_complex_div_straight (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
321 /* wide ranges of inputs must work for complex divide. */
322 expand_complex_div_wide (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
325 /* C99-like requirements for complex divide (not yet implemented). */
330 /* Expand complex negation to scalars:
335 expand_complex_negation (block_stmt_iterator
*bsi
, tree inner_type
,
340 rr
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ar
);
341 ri
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ai
);
343 update_complex_assignment (bsi
, rr
, ri
);
346 /* Expand complex conjugate to scalars:
351 expand_complex_conjugate (block_stmt_iterator
*bsi
, tree inner_type
,
356 ri
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ai
);
358 update_complex_assignment (bsi
, ar
, ri
);
361 /* Expand complex comparison (EQ or NE only). */
364 expand_complex_comparison (block_stmt_iterator
*bsi
, tree ar
, tree ai
,
365 tree br
, tree bi
, enum tree_code code
)
367 tree cr
, ci
, cc
, stmt
, expr
, type
;
369 cr
= gimplify_build2 (bsi
, code
, boolean_type_node
, ar
, br
);
370 ci
= gimplify_build2 (bsi
, code
, boolean_type_node
, ai
, bi
);
371 cc
= gimplify_build2 (bsi
,
372 (code
== EQ_EXPR
? TRUTH_AND_EXPR
: TRUTH_OR_EXPR
),
373 boolean_type_node
, cr
, ci
);
375 stmt
= expr
= bsi_stmt (*bsi
);
377 switch (TREE_CODE (stmt
))
380 expr
= TREE_OPERAND (stmt
, 0);
383 type
= TREE_TYPE (TREE_OPERAND (expr
, 1));
384 TREE_OPERAND (expr
, 1) = fold_convert (type
, cc
);
387 TREE_OPERAND (stmt
, 0) = cc
;
396 /* Process one statement. If we identify a complex operation, expand it. */
399 expand_complex_operations_1 (block_stmt_iterator
*bsi
)
401 tree stmt
= bsi_stmt (*bsi
);
402 tree rhs
, type
, inner_type
;
403 tree ac
, ar
, ai
, bc
, br
, bi
;
406 switch (TREE_CODE (stmt
))
409 stmt
= TREE_OPERAND (stmt
, 0);
412 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
417 rhs
= TREE_OPERAND (stmt
, 1);
421 rhs
= TREE_OPERAND (stmt
, 0);
428 type
= TREE_TYPE (rhs
);
429 code
= TREE_CODE (rhs
);
431 /* Initial filter for operations we handle. */
444 if (TREE_CODE (type
) != COMPLEX_TYPE
)
446 inner_type
= TREE_TYPE (type
);
451 inner_type
= TREE_TYPE (TREE_OPERAND (rhs
, 1));
452 if (TREE_CODE (inner_type
) != COMPLEX_TYPE
)
460 /* Extract the components of the two complex values. Make sure and
461 handle the common case of the same value used twice specially. */
462 ac
= TREE_OPERAND (rhs
, 0);
463 ar
= extract_component (bsi
, ac
, 0);
464 ai
= extract_component (bsi
, ac
, 1);
466 if (TREE_CODE_CLASS (code
) == tcc_unary
)
470 bc
= TREE_OPERAND (rhs
, 1);
475 br
= extract_component (bsi
, bc
, 0);
476 bi
= extract_component (bsi
, bc
, 1);
484 expand_complex_addition (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
488 expand_complex_multiplication (bsi
, inner_type
, ar
, ai
, br
, bi
);
496 expand_complex_division (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
500 expand_complex_negation (bsi
, inner_type
, ar
, ai
);
504 expand_complex_conjugate (bsi
, inner_type
, ar
, ai
);
509 expand_complex_comparison (bsi
, ar
, ai
, br
, bi
, code
);
517 /* Build a constant of type TYPE, made of VALUE's bits replicated
518 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
520 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
522 int width
= tree_low_cst (TYPE_SIZE (inner_type
), 1);
523 int n
= HOST_BITS_PER_WIDE_INT
/ width
;
524 unsigned HOST_WIDE_INT low
, high
, mask
;
529 if (width
== HOST_BITS_PER_WIDE_INT
)
533 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
534 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
537 if (TYPE_PRECISION (type
) < HOST_BITS_PER_WIDE_INT
)
538 low
&= ((HOST_WIDE_INT
)1 << TYPE_PRECISION (type
)) - 1, high
= 0;
539 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_WIDE_INT
)
541 else if (TYPE_PRECISION (type
) == 2 * HOST_BITS_PER_WIDE_INT
)
546 ret
= build_int_cst_wide (type
, low
, high
);
550 static GTY(()) tree vector_inner_type
;
551 static GTY(()) tree vector_last_type
;
552 static GTY(()) int vector_last_nunits
;
554 /* Return a suitable vector types made of SUBPARTS units each of mode
555 "word_mode" (the global variable). */
557 build_word_mode_vector_type (int nunits
)
559 if (!vector_inner_type
)
560 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
561 else if (vector_last_nunits
== nunits
)
563 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
564 return vector_last_type
;
567 /* We build a new type, but we canonicalize it nevertheless,
568 because it still saves some memory. */
569 vector_last_nunits
= nunits
;
570 vector_last_type
= type_hash_canon (nunits
,
571 build_vector_type (vector_inner_type
,
573 return vector_last_type
;
576 typedef tree (*elem_op_func
) (block_stmt_iterator
*,
577 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
580 tree_vec_extract (block_stmt_iterator
*bsi
, tree type
,
581 tree t
, tree bitsize
, tree bitpos
)
584 return gimplify_build3 (bsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
586 return gimplify_build1 (bsi
, VIEW_CONVERT_EXPR
, type
, t
);
590 do_unop (block_stmt_iterator
*bsi
, tree inner_type
, tree a
,
591 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
594 a
= tree_vec_extract (bsi
, inner_type
, a
, bitsize
, bitpos
);
595 return gimplify_build1 (bsi
, code
, inner_type
, a
);
599 do_binop (block_stmt_iterator
*bsi
, tree inner_type
, tree a
, tree b
,
600 tree bitpos
, tree bitsize
, enum tree_code code
)
602 a
= tree_vec_extract (bsi
, inner_type
, a
, bitsize
, bitpos
);
603 b
= tree_vec_extract (bsi
, inner_type
, b
, bitsize
, bitpos
);
604 return gimplify_build2 (bsi
, code
, inner_type
, a
, b
);
607 /* Expand vector addition to scalars. This does bit twiddling
608 in order to increase parallelism:
610 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
613 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
614 (a ^ ~b) & 0x80808080
616 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
618 This optimization should be done only if 4 vector items or more
621 do_plus_minus (block_stmt_iterator
*bsi
, tree word_type
, tree a
, tree b
,
622 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
625 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
626 unsigned HOST_WIDE_INT max
;
627 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
629 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
630 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
631 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
633 a
= tree_vec_extract (bsi
, word_type
, a
, bitsize
, bitpos
);
634 b
= tree_vec_extract (bsi
, word_type
, b
, bitsize
, bitpos
);
636 signs
= gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
637 b_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
638 if (code
== PLUS_EXPR
)
639 a_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
642 a_low
= gimplify_build2 (bsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
643 signs
= gimplify_build1 (bsi
, BIT_NOT_EXPR
, word_type
, signs
);
646 signs
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
647 result_low
= gimplify_build2 (bsi
, code
, word_type
, a_low
, b_low
);
648 return gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
652 do_negate (block_stmt_iterator
*bsi
, tree word_type
, tree b
,
653 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
654 tree bitsize ATTRIBUTE_UNUSED
,
655 enum tree_code code ATTRIBUTE_UNUSED
)
657 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
659 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
661 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
662 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
663 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
665 b
= tree_vec_extract (bsi
, word_type
, b
, bitsize
, bitpos
);
667 b_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
668 signs
= gimplify_build1 (bsi
, BIT_NOT_EXPR
, word_type
, b
);
669 signs
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
670 result_low
= gimplify_build2 (bsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
671 return gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
674 /* Expand a vector operation to scalars, by using many operations
675 whose type is the vector type's inner type. */
677 expand_vector_piecewise (block_stmt_iterator
*bsi
, elem_op_func f
,
678 tree type
, tree inner_type
,
679 tree a
, tree b
, enum tree_code code
)
681 tree head
, *chain
= &head
;
682 tree part_width
= TYPE_SIZE (inner_type
);
683 tree index
= bitsize_int (0);
684 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
685 int delta
= tree_low_cst (part_width
, 1)
686 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type
)), 1);
689 for (i
= 0; i
< nunits
;
690 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
, 0))
692 tree result
= f (bsi
, inner_type
, a
, b
, index
, part_width
, code
);
693 *chain
= tree_cons (NULL_TREE
, result
, NULL_TREE
);
694 chain
= &TREE_CHAIN (*chain
);
697 return build1 (CONSTRUCTOR
, type
, head
);
700 /* Expand a vector operation to scalars with the freedom to use
701 a scalar integer type, or to use a different size for the items
702 in the vector type. */
704 expand_vector_parallel (block_stmt_iterator
*bsi
, elem_op_func f
, tree type
,
708 tree result
, compute_type
;
709 enum machine_mode mode
;
710 int n_words
= tree_low_cst (TYPE_SIZE_UNIT (type
), 1) / UNITS_PER_WORD
;
712 /* We have three strategies. If the type is already correct, just do
713 the operation an element at a time. Else, if the vector is wider than
714 one word, do it a word at a time; finally, if the vector is smaller
715 than one word, do it as a scalar. */
716 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
717 return expand_vector_piecewise (bsi
, f
,
718 type
, TREE_TYPE (type
),
720 else if (n_words
> 1)
722 tree word_type
= build_word_mode_vector_type (n_words
);
723 result
= expand_vector_piecewise (bsi
, f
,
724 word_type
, TREE_TYPE (word_type
),
726 result
= gimplify_val (bsi
, word_type
, result
);
730 /* Use a single scalar operation with a mode no wider than word_mode. */
731 mode
= mode_for_size (tree_low_cst (TYPE_SIZE (type
), 1), MODE_INT
, 0);
732 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
733 result
= f (bsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
736 return build1 (VIEW_CONVERT_EXPR
, type
, result
);
739 /* Expand a vector operation to scalars; for integer types we can use
740 special bit twiddling tricks to do the sums a word at a time, using
741 function F_PARALLEL instead of F. These tricks are done only if
742 they can process at least four items, that is, only if the vector
743 holds at least four items and if a word can hold four items. */
745 expand_vector_addition (block_stmt_iterator
*bsi
,
746 elem_op_func f
, elem_op_func f_parallel
,
747 tree type
, tree a
, tree b
, enum tree_code code
)
749 int parts_per_word
= UNITS_PER_WORD
750 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type
)), 1);
752 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
753 && parts_per_word
>= 4
754 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
755 return expand_vector_parallel (bsi
, f_parallel
,
758 return expand_vector_piecewise (bsi
, f
,
759 type
, TREE_TYPE (type
),
763 /* Return a type for the widest vector mode whose components are of mode
764 INNER_MODE, or NULL_TREE if none is found. */
766 type_for_widest_vector_mode (enum machine_mode inner_mode
, optab op
)
768 enum machine_mode best_mode
= VOIDmode
, mode
;
771 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
772 mode
= MIN_MODE_VECTOR_FLOAT
;
774 mode
= MIN_MODE_VECTOR_INT
;
776 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
777 if (GET_MODE_INNER (mode
) == inner_mode
778 && GET_MODE_NUNITS (mode
) > best_nunits
779 && op
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
780 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
782 if (best_mode
== VOIDmode
)
785 return lang_hooks
.types
.type_for_mode (best_mode
, 1);
788 /* Process one statement. If we identify a vector operation, expand it. */
791 expand_vector_operations_1 (block_stmt_iterator
*bsi
)
793 tree stmt
= bsi_stmt (*bsi
);
794 tree
*p_rhs
, rhs
, type
, compute_type
;
796 enum machine_mode compute_mode
;
799 switch (TREE_CODE (stmt
))
802 stmt
= TREE_OPERAND (stmt
, 0);
803 if (!stmt
|| TREE_CODE (stmt
) != MODIFY_EXPR
)
809 p_rhs
= &TREE_OPERAND (stmt
, 1);
817 type
= TREE_TYPE (rhs
);
818 if (TREE_CODE (type
) != VECTOR_TYPE
)
821 code
= TREE_CODE (rhs
);
822 if (TREE_CODE_CLASS (code
) != tcc_unary
823 && TREE_CODE_CLASS (code
) != tcc_binary
)
826 if (code
== NOP_EXPR
|| code
== VIEW_CONVERT_EXPR
)
829 gcc_assert (code
!= CONVERT_EXPR
);
830 op
= optab_for_tree_code (code
, type
);
832 /* Optabs will try converting a negation into a subtraction, so
833 look for it as well. TODO: negation of floating-point vectors
834 might be turned into an exclusive OR toggling the sign bit. */
836 && code
== NEGATE_EXPR
837 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
838 op
= optab_for_tree_code (MINUS_EXPR
, type
);
840 /* For very wide vectors, try using a smaller vector mode. */
842 if (TYPE_MODE (type
) == BLKmode
&& op
)
844 tree vector_compute_type
845 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type
)), op
);
846 if (vector_compute_type
!= NULL_TREE
)
847 compute_type
= vector_compute_type
;
850 compute_mode
= TYPE_MODE (compute_type
);
852 /* If we are breaking a BLKmode vector into smaller pieces,
853 type_for_widest_vector_mode has already looked into the optab,
854 so skip these checks. */
855 if (compute_type
== type
)
857 if ((GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_INT
858 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FLOAT
)
860 && op
->handlers
[compute_mode
].insn_code
!= CODE_FOR_nothing
)
864 /* There is no operation in hardware, so fall back to scalars. */
865 compute_type
= TREE_TYPE (type
);
866 compute_mode
= TYPE_MODE (compute_type
);
870 /* If the compute mode is not a vector mode (hence we are decomposing
871 a BLKmode vector to smaller, hardware-supported vectors), we may
872 want to expand the operations in parallel. */
873 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
874 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
)
879 if (TYPE_TRAP_SIGNED (type
))
882 *p_rhs
= expand_vector_addition (bsi
, do_binop
, do_plus_minus
, type
,
883 TREE_OPERAND (rhs
, 0),
884 TREE_OPERAND (rhs
, 1), code
);
885 modify_stmt (bsi_stmt (*bsi
));
889 if (TYPE_TRAP_SIGNED (type
))
892 *p_rhs
= expand_vector_addition (bsi
, do_unop
, do_negate
, type
,
893 TREE_OPERAND (rhs
, 0),
895 modify_stmt (bsi_stmt (*bsi
));
901 *p_rhs
= expand_vector_parallel (bsi
, do_binop
, type
,
902 TREE_OPERAND (rhs
, 0),
903 TREE_OPERAND (rhs
, 1), code
);
904 modify_stmt (bsi_stmt (*bsi
));
908 *p_rhs
= expand_vector_parallel (bsi
, do_unop
, type
,
909 TREE_OPERAND (rhs
, 0),
911 modify_stmt (bsi_stmt (*bsi
));
918 if (TREE_CODE_CLASS (code
) == tcc_unary
)
919 *p_rhs
= expand_vector_piecewise (bsi
, do_unop
, type
, compute_type
,
920 TREE_OPERAND (rhs
, 0),
923 *p_rhs
= expand_vector_piecewise (bsi
, do_binop
, type
, compute_type
,
924 TREE_OPERAND (rhs
, 0),
925 TREE_OPERAND (rhs
, 1), code
);
927 modify_stmt (bsi_stmt (*bsi
));
931 expand_vector_operations (void)
933 block_stmt_iterator bsi
;
938 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
939 expand_vector_operations_1 (&bsi
);
944 tree_lower_operations (void)
946 int old_last_basic_block
= last_basic_block
;
947 block_stmt_iterator bsi
;
952 if (bb
->index
>= old_last_basic_block
)
954 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
956 expand_complex_operations_1 (&bsi
);
957 expand_vector_operations_1 (&bsi
);
963 struct tree_opt_pass pass_lower_vector_ssa
=
967 expand_vector_operations
, /* execute */
970 0, /* static_pass_number */
972 PROP_cfg
, /* properties_required */
973 0, /* properties_provided */
974 0, /* properties_destroyed */
975 0, /* todo_flags_start */
976 TODO_dump_func
| TODO_rename_vars
/* todo_flags_finish */
977 | TODO_ggc_collect
| TODO_verify_ssa
978 | TODO_verify_stmts
| TODO_verify_flow
,
982 struct tree_opt_pass pass_pre_expand
=
984 "oplower", /* name */
986 tree_lower_operations
, /* execute */
989 0, /* static_pass_number */
991 PROP_cfg
, /* properties_required */
992 0, /* properties_provided */
993 0, /* properties_destroyed */
994 0, /* todo_flags_start */
995 TODO_dump_func
| TODO_ggc_collect
996 | TODO_verify_stmts
, /* todo_flags_finish */
1000 #include "gt-tree-complex.h"