1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
41 /* Extract the real or imaginary part of a complex variable or constant.
42 Make sure that it's a proper gimple_val and gimplify it if not.
43 Emit any new code before BSI. */
46 extract_component (block_stmt_iterator
*bsi
, tree t
, bool imagpart_p
)
50 inner_type
= TREE_TYPE (TREE_TYPE (t
));
51 switch (TREE_CODE (t
))
54 ret
= (imagpart_p
? TREE_IMAGPART (t
) : TREE_REALPART (t
));
58 ret
= TREE_OPERAND (t
, imagpart_p
);
63 ret
= build1 ((imagpart_p
? IMAGPART_EXPR
: REALPART_EXPR
),
71 return gimplify_val (bsi
, inner_type
, ret
);
74 /* Update an assignment to a complex variable in place. */
77 update_complex_assignment (block_stmt_iterator
*bsi
, tree r
, tree i
)
79 tree stmt
= bsi_stmt (*bsi
);
82 if (TREE_CODE (stmt
) == RETURN_EXPR
)
83 stmt
= TREE_OPERAND (stmt
, 0);
85 type
= TREE_TYPE (TREE_OPERAND (stmt
, 1));
86 TREE_OPERAND (stmt
, 1) = build (COMPLEX_EXPR
, type
, r
, i
);
90 /* Expand complex addition to scalars:
91 a + b = (ar + br) + i(ai + bi)
92 a - b = (ar - br) + i(ai + bi)
96 expand_complex_addition (block_stmt_iterator
*bsi
, tree inner_type
,
97 tree ar
, tree ai
, tree br
, tree bi
,
102 rr
= gimplify_build2 (bsi
, code
, inner_type
, ar
, br
);
103 ri
= gimplify_build2 (bsi
, code
, inner_type
, ai
, bi
);
105 update_complex_assignment (bsi
, rr
, ri
);
108 /* Expand complex multiplication to scalars:
109 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
113 expand_complex_multiplication (block_stmt_iterator
*bsi
, tree inner_type
,
114 tree ar
, tree ai
, tree br
, tree bi
)
116 tree t1
, t2
, t3
, t4
, rr
, ri
;
118 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, br
);
119 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, bi
);
120 t3
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, bi
);
122 /* Avoid expanding redundant multiplication for the common
123 case of squaring a complex number. */
124 if (ar
== br
&& ai
== bi
)
127 t4
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, br
);
129 rr
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, t1
, t2
);
130 ri
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t3
, t4
);
132 update_complex_assignment (bsi
, rr
, ri
);
135 /* Expand complex division to scalars, straightforward algorithm.
136 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
141 expand_complex_div_straight (block_stmt_iterator
*bsi
, tree inner_type
,
142 tree ar
, tree ai
, tree br
, tree bi
,
145 tree rr
, ri
, div
, t1
, t2
, t3
;
147 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, br
, br
);
148 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, bi
, bi
);
149 div
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, t2
);
151 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, br
);
152 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, bi
);
153 t3
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, t2
);
154 rr
= gimplify_build2 (bsi
, code
, inner_type
, t3
, div
);
156 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, br
);
157 t2
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, bi
);
158 t3
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, t1
, t2
);
159 ri
= gimplify_build2 (bsi
, code
, inner_type
, t3
, div
);
161 update_complex_assignment (bsi
, rr
, ri
);
164 /* Expand complex division to scalars, modified algorithm to minimize
165 overflow with wide input ranges. */
168 expand_complex_div_wide (block_stmt_iterator
*bsi
, tree inner_type
,
169 tree ar
, tree ai
, tree br
, tree bi
,
172 tree rr
, ri
, ratio
, div
, t1
, t2
, min
, max
, cond
;
174 /* Examine |br| < |bi|, and branch. */
175 t1
= gimplify_build1 (bsi
, ABS_EXPR
, inner_type
, br
);
176 t2
= gimplify_build1 (bsi
, ABS_EXPR
, inner_type
, bi
);
177 cond
= fold (build (LT_EXPR
, boolean_type_node
, t1
, t2
));
180 if (TREE_CONSTANT (cond
))
182 if (integer_zerop (cond
))
189 basic_block bb_cond
, bb_true
, bb_false
, bb_join
;
193 l1
= create_artificial_label ();
194 t1
= build (GOTO_EXPR
, void_type_node
, l1
);
195 l2
= create_artificial_label ();
196 t2
= build (GOTO_EXPR
, void_type_node
, l2
);
197 cond
= build (COND_EXPR
, void_type_node
, cond
, t1
, t2
);
198 bsi_insert_before (bsi
, cond
, BSI_SAME_STMT
);
200 min
= make_rename_temp (inner_type
, NULL
);
201 max
= make_rename_temp (inner_type
, NULL
);
202 l3
= create_artificial_label ();
204 /* Split the original block, and create the TRUE and FALSE blocks. */
205 e
= split_block (bsi
->bb
, cond
);
208 bb_true
= create_empty_bb (bb_cond
);
209 bb_false
= create_empty_bb (bb_true
);
211 /* Wire the blocks together. */
212 e
->flags
= EDGE_TRUE_VALUE
;
213 redirect_edge_succ (e
, bb_true
);
214 make_edge (bb_cond
, bb_false
, EDGE_FALSE_VALUE
);
215 make_edge (bb_true
, bb_join
, 0);
216 make_edge (bb_false
, bb_join
, 0);
218 /* Update dominance info. Note that bb_join's data was
219 updated by split_block. */
220 if (dom_info_available_p (CDI_DOMINATORS
))
222 set_immediate_dominator (CDI_DOMINATORS
, bb_true
, bb_cond
);
223 set_immediate_dominator (CDI_DOMINATORS
, bb_false
, bb_cond
);
226 /* Compute min and max for TRUE block. */
227 *bsi
= bsi_start (bb_true
);
228 t1
= build (LABEL_EXPR
, void_type_node
, l1
);
229 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
230 t1
= build (MODIFY_EXPR
, inner_type
, min
, br
);
231 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
232 t1
= build (MODIFY_EXPR
, inner_type
, max
, bi
);
233 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
235 /* Compute min and max for FALSE block. */
236 *bsi
= bsi_start (bb_false
);
237 t1
= build (LABEL_EXPR
, void_type_node
, l2
);
238 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
239 t1
= build (MODIFY_EXPR
, inner_type
, min
, bi
);
240 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
241 t1
= build (MODIFY_EXPR
, inner_type
, max
, br
);
242 bsi_insert_after (bsi
, t1
, BSI_NEW_STMT
);
244 /* Insert the join label into the tail of the original block. */
245 *bsi
= bsi_start (bb_join
);
246 t1
= build (LABEL_EXPR
, void_type_node
, l3
);
247 bsi_insert_before (bsi
, t1
, BSI_SAME_STMT
);
250 /* Now we have MIN(|br|, |bi|) and MAX(|br|, |bi|). We now use the
251 ratio min/max to scale both the dividend and divisor. */
252 ratio
= gimplify_build2 (bsi
, code
, inner_type
, min
, max
);
254 /* Calculate the divisor: min*ratio + max. */
255 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, min
, ratio
);
256 div
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, t1
, max
);
258 /* Result is now ((ar + ai*ratio)/div) + i((ai - ar*ratio)/div). */
259 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ai
, ratio
);
260 t2
= gimplify_build2 (bsi
, PLUS_EXPR
, inner_type
, ar
, t1
);
261 rr
= gimplify_build2 (bsi
, code
, inner_type
, t2
, div
);
263 t1
= gimplify_build2 (bsi
, MULT_EXPR
, inner_type
, ar
, ratio
);
264 t2
= gimplify_build2 (bsi
, MINUS_EXPR
, inner_type
, ai
, t1
);
265 ri
= gimplify_build2 (bsi
, code
, inner_type
, t2
, div
);
267 update_complex_assignment (bsi
, rr
, ri
);
270 /* Expand complex division to scalars. */
273 expand_complex_division (block_stmt_iterator
*bsi
, tree inner_type
,
274 tree ar
, tree ai
, tree br
, tree bi
,
277 switch (flag_complex_divide_method
)
280 /* straightforward implementation of complex divide acceptable. */
281 expand_complex_div_straight (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
284 /* wide ranges of inputs must work for complex divide. */
285 expand_complex_div_wide (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
288 /* C99-like requirements for complex divide (not yet implemented). */
293 /* Expand complex negation to scalars:
298 expand_complex_negation (block_stmt_iterator
*bsi
, tree inner_type
,
303 rr
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ar
);
304 ri
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ai
);
306 update_complex_assignment (bsi
, rr
, ri
);
309 /* Expand complex conjugate to scalars:
314 expand_complex_conjugate (block_stmt_iterator
*bsi
, tree inner_type
,
319 ri
= gimplify_build1 (bsi
, NEGATE_EXPR
, inner_type
, ai
);
321 update_complex_assignment (bsi
, ar
, ri
);
324 /* Expand complex comparison (EQ or NE only). */
327 expand_complex_comparison (block_stmt_iterator
*bsi
, tree ar
, tree ai
,
328 tree br
, tree bi
, enum tree_code code
)
330 tree cr
, ci
, cc
, stmt
, expr
, type
;
332 cr
= gimplify_build2 (bsi
, code
, boolean_type_node
, ar
, br
);
333 ci
= gimplify_build2 (bsi
, code
, boolean_type_node
, ai
, bi
);
334 cc
= gimplify_build2 (bsi
,
335 (code
== EQ_EXPR
? TRUTH_AND_EXPR
: TRUTH_OR_EXPR
),
336 boolean_type_node
, cr
, ci
);
338 stmt
= expr
= bsi_stmt (*bsi
);
340 switch (TREE_CODE (stmt
))
343 expr
= TREE_OPERAND (stmt
, 0);
346 type
= TREE_TYPE (TREE_OPERAND (expr
, 1));
347 TREE_OPERAND (expr
, 1) = fold_convert (type
, cc
);
350 TREE_OPERAND (stmt
, 0) = cc
;
359 /* Process one statement. If we identify a complex operation, expand it. */
362 expand_complex_operations_1 (block_stmt_iterator
*bsi
)
364 tree stmt
= bsi_stmt (*bsi
);
365 tree rhs
, type
, inner_type
;
366 tree ac
, ar
, ai
, bc
, br
, bi
;
369 switch (TREE_CODE (stmt
))
372 stmt
= TREE_OPERAND (stmt
, 0);
375 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
380 rhs
= TREE_OPERAND (stmt
, 1);
384 rhs
= TREE_OPERAND (stmt
, 0);
391 type
= TREE_TYPE (rhs
);
392 code
= TREE_CODE (rhs
);
394 /* Initial filter for operations we handle. */
407 if (TREE_CODE (type
) != COMPLEX_TYPE
)
409 inner_type
= TREE_TYPE (type
);
414 inner_type
= TREE_TYPE (TREE_OPERAND (rhs
, 1));
415 if (TREE_CODE (inner_type
) != COMPLEX_TYPE
)
423 /* Extract the components of the two complex values. Make sure and
424 handle the common case of the same value used twice specially. */
425 ac
= TREE_OPERAND (rhs
, 0);
426 ar
= extract_component (bsi
, ac
, 0);
427 ai
= extract_component (bsi
, ac
, 1);
429 if (TREE_CODE_CLASS (code
) == tcc_unary
)
433 bc
= TREE_OPERAND (rhs
, 1);
438 br
= extract_component (bsi
, bc
, 0);
439 bi
= extract_component (bsi
, bc
, 1);
447 expand_complex_addition (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
451 expand_complex_multiplication (bsi
, inner_type
, ar
, ai
, br
, bi
);
459 expand_complex_division (bsi
, inner_type
, ar
, ai
, br
, bi
, code
);
463 expand_complex_negation (bsi
, inner_type
, ar
, ai
);
467 expand_complex_conjugate (bsi
, inner_type
, ar
, ai
);
472 expand_complex_comparison (bsi
, ar
, ai
, br
, bi
, code
);
480 /* Build a constant of type TYPE, made of VALUE's bits replicated
481 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
483 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
485 int width
= tree_low_cst (TYPE_SIZE (inner_type
), 1);
486 int n
= HOST_BITS_PER_WIDE_INT
/ width
;
487 unsigned HOST_WIDE_INT low
, high
, mask
;
492 if (width
== HOST_BITS_PER_WIDE_INT
)
496 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
497 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
500 if (TYPE_PRECISION (type
) < HOST_BITS_PER_WIDE_INT
)
501 low
&= ((HOST_WIDE_INT
)1 << TYPE_PRECISION (type
)) - 1, high
= 0;
502 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_WIDE_INT
)
504 else if (TYPE_PRECISION (type
) == 2 * HOST_BITS_PER_WIDE_INT
)
509 ret
= build_int_cst_wide (type
, low
, high
);
513 static GTY(()) tree vector_inner_type
;
514 static GTY(()) tree vector_last_type
;
515 static GTY(()) int vector_last_nunits
;
517 /* Return a suitable vector types made of SUBPARTS units each of mode
518 "word_mode" (the global variable). */
520 build_word_mode_vector_type (int nunits
)
522 if (!vector_inner_type
)
523 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
524 else if (vector_last_nunits
== nunits
)
526 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
527 return vector_last_type
;
530 /* We build a new type, but we canonicalize it nevertheless,
531 because it still saves some memory. */
532 vector_last_nunits
= nunits
;
533 vector_last_type
= type_hash_canon (nunits
,
534 build_vector_type (vector_inner_type
,
536 return vector_last_type
;
539 typedef tree (*elem_op_func
) (block_stmt_iterator
*,
540 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
543 tree_vec_extract (block_stmt_iterator
*bsi
, tree type
,
544 tree t
, tree bitsize
, tree bitpos
)
547 return gimplify_build3 (bsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
549 return gimplify_build1 (bsi
, VIEW_CONVERT_EXPR
, type
, t
);
553 do_unop (block_stmt_iterator
*bsi
, tree inner_type
, tree a
,
554 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
557 a
= tree_vec_extract (bsi
, inner_type
, a
, bitsize
, bitpos
);
558 return gimplify_build1 (bsi
, code
, inner_type
, a
);
562 do_binop (block_stmt_iterator
*bsi
, tree inner_type
, tree a
, tree b
,
563 tree bitpos
, tree bitsize
, enum tree_code code
)
565 a
= tree_vec_extract (bsi
, inner_type
, a
, bitsize
, bitpos
);
566 b
= tree_vec_extract (bsi
, inner_type
, b
, bitsize
, bitpos
);
567 return gimplify_build2 (bsi
, code
, inner_type
, a
, b
);
570 /* Expand vector addition to scalars. This does bit twiddling
571 in order to increase parallelism:
573 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
576 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
577 (a ^ ~b) & 0x80808080
579 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
581 This optimization should be done only if 4 vector items or more
584 do_plus_minus (block_stmt_iterator
*bsi
, tree word_type
, tree a
, tree b
,
585 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
588 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
589 unsigned HOST_WIDE_INT max
;
590 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
592 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
593 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
594 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
596 a
= tree_vec_extract (bsi
, word_type
, a
, bitsize
, bitpos
);
597 b
= tree_vec_extract (bsi
, word_type
, b
, bitsize
, bitpos
);
599 signs
= gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
600 b_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
601 if (code
== PLUS_EXPR
)
602 a_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
605 a_low
= gimplify_build2 (bsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
606 signs
= gimplify_build1 (bsi
, BIT_NOT_EXPR
, word_type
, signs
);
609 signs
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
610 result_low
= gimplify_build2 (bsi
, code
, word_type
, a_low
, b_low
);
611 return gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
615 do_negate (block_stmt_iterator
*bsi
, tree word_type
, tree b
,
616 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
617 tree bitsize ATTRIBUTE_UNUSED
,
618 enum tree_code code ATTRIBUTE_UNUSED
)
620 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
622 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
624 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
625 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
626 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
628 b
= tree_vec_extract (bsi
, word_type
, b
, bitsize
, bitpos
);
630 b_low
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
631 signs
= gimplify_build1 (bsi
, BIT_NOT_EXPR
, word_type
, b
);
632 signs
= gimplify_build2 (bsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
633 result_low
= gimplify_build2 (bsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
634 return gimplify_build2 (bsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
637 /* Expand a vector operation to scalars, by using many operations
638 whose type is the vector type's inner type. */
640 expand_vector_piecewise (block_stmt_iterator
*bsi
, elem_op_func f
,
641 tree type
, tree inner_type
,
642 tree a
, tree b
, enum tree_code code
)
644 tree head
, *chain
= &head
;
645 tree part_width
= TYPE_SIZE (inner_type
);
646 tree index
= bitsize_int (0);
647 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
648 int delta
= tree_low_cst (part_width
, 1)
649 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type
)), 1);
652 for (i
= 0; i
< nunits
;
653 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
, 0))
655 tree result
= f (bsi
, inner_type
, a
, b
, index
, part_width
, code
);
656 *chain
= tree_cons (NULL_TREE
, result
, NULL_TREE
);
657 chain
= &TREE_CHAIN (*chain
);
660 return build1 (CONSTRUCTOR
, type
, head
);
663 /* Expand a vector operation to scalars with the freedom to use
664 a scalar integer type, or to use a different size for the items
665 in the vector type. */
667 expand_vector_parallel (block_stmt_iterator
*bsi
, elem_op_func f
, tree type
,
671 tree result
, compute_type
;
672 enum machine_mode mode
;
673 int n_words
= tree_low_cst (TYPE_SIZE_UNIT (type
), 1) / UNITS_PER_WORD
;
675 /* We have three strategies. If the type is already correct, just do
676 the operation an element at a time. Else, if the vector is wider than
677 one word, do it a word at a time; finally, if the vector is smaller
678 than one word, do it as a scalar. */
679 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
680 return expand_vector_piecewise (bsi
, f
,
681 type
, TREE_TYPE (type
),
683 else if (n_words
> 1)
685 tree word_type
= build_word_mode_vector_type (n_words
);
686 result
= expand_vector_piecewise (bsi
, f
,
687 word_type
, TREE_TYPE (word_type
),
689 result
= gimplify_val (bsi
, word_type
, result
);
693 /* Use a single scalar operation with a mode no wider than word_mode. */
694 mode
= mode_for_size (tree_low_cst (TYPE_SIZE (type
), 1), MODE_INT
, 0);
695 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
696 result
= f (bsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
699 return build1 (VIEW_CONVERT_EXPR
, type
, result
);
702 /* Expand a vector operation to scalars; for integer types we can use
703 special bit twiddling tricks to do the sums a word at a time, using
704 function F_PARALLEL instead of F. These tricks are done only if
705 they can process at least four items, that is, only if the vector
706 holds at least four items and if a word can hold four items. */
708 expand_vector_addition (block_stmt_iterator
*bsi
,
709 elem_op_func f
, elem_op_func f_parallel
,
710 tree type
, tree a
, tree b
, enum tree_code code
)
712 int parts_per_word
= UNITS_PER_WORD
713 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type
)), 1);
715 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
716 && parts_per_word
>= 4
717 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
718 return expand_vector_parallel (bsi
, f_parallel
,
721 return expand_vector_piecewise (bsi
, f
,
722 type
, TREE_TYPE (type
),
726 /* Return a type for the widest vector mode whose components are of mode
727 INNER_MODE, or NULL_TREE if none is found. */
729 type_for_widest_vector_mode (enum machine_mode inner_mode
, optab op
)
731 enum machine_mode best_mode
= VOIDmode
, mode
;
734 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
735 mode
= MIN_MODE_VECTOR_FLOAT
;
737 mode
= MIN_MODE_VECTOR_INT
;
739 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
740 if (GET_MODE_INNER (mode
) == inner_mode
741 && GET_MODE_NUNITS (mode
) > best_nunits
742 && op
->handlers
[mode
].insn_code
!= CODE_FOR_nothing
)
743 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
745 if (best_mode
== VOIDmode
)
748 return lang_hooks
.types
.type_for_mode (best_mode
, 1);
751 /* Process one statement. If we identify a vector operation, expand it. */
754 expand_vector_operations_1 (block_stmt_iterator
*bsi
)
756 tree stmt
= bsi_stmt (*bsi
);
757 tree
*p_rhs
, rhs
, type
, compute_type
;
759 enum machine_mode compute_mode
;
762 switch (TREE_CODE (stmt
))
765 stmt
= TREE_OPERAND (stmt
, 0);
766 if (!stmt
|| TREE_CODE (stmt
) != MODIFY_EXPR
)
772 p_rhs
= &TREE_OPERAND (stmt
, 1);
780 type
= TREE_TYPE (rhs
);
781 if (TREE_CODE (type
) != VECTOR_TYPE
)
784 code
= TREE_CODE (rhs
);
785 if (TREE_CODE_CLASS (code
) != tcc_unary
786 && TREE_CODE_CLASS (code
) != tcc_binary
)
789 if (code
== NOP_EXPR
|| code
== VIEW_CONVERT_EXPR
)
792 gcc_assert (code
!= CONVERT_EXPR
);
793 op
= optab_for_tree_code (code
, type
);
795 /* Optabs will try converting a negation into a subtraction, so
796 look for it as well. TODO: negation of floating-point vectors
797 might be turned into an exclusive OR toggling the sign bit. */
799 && code
== NEGATE_EXPR
800 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
801 op
= optab_for_tree_code (MINUS_EXPR
, type
);
803 /* For very wide vectors, try using a smaller vector mode. */
805 if (TYPE_MODE (type
) == BLKmode
&& op
)
807 tree vector_compute_type
808 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type
)), op
);
809 if (vector_compute_type
!= NULL_TREE
)
810 compute_type
= vector_compute_type
;
813 compute_mode
= TYPE_MODE (compute_type
);
815 /* If we are breaking a BLKmode vector into smaller pieces,
816 type_for_widest_vector_mode has already looked into the optab,
817 so skip these checks. */
818 if (compute_type
== type
)
820 if ((GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_INT
821 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FLOAT
)
823 && op
->handlers
[compute_mode
].insn_code
!= CODE_FOR_nothing
)
827 /* There is no operation in hardware, so fall back to scalars. */
828 compute_type
= TREE_TYPE (type
);
829 compute_mode
= TYPE_MODE (compute_type
);
833 /* If the compute mode is not a vector mode (hence we are decomposing
834 a BLKmode vector to smaller, hardware-supported vectors), we may
835 want to expand the operations in parallel. */
836 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
837 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
)
842 if (TYPE_TRAP_SIGNED (type
))
845 *p_rhs
= expand_vector_addition (bsi
, do_binop
, do_plus_minus
, type
,
846 TREE_OPERAND (rhs
, 0),
847 TREE_OPERAND (rhs
, 1), code
);
848 modify_stmt (bsi_stmt (*bsi
));
852 if (TYPE_TRAP_SIGNED (type
))
855 *p_rhs
= expand_vector_addition (bsi
, do_unop
, do_negate
, type
,
856 TREE_OPERAND (rhs
, 0),
858 modify_stmt (bsi_stmt (*bsi
));
864 *p_rhs
= expand_vector_parallel (bsi
, do_binop
, type
,
865 TREE_OPERAND (rhs
, 0),
866 TREE_OPERAND (rhs
, 1), code
);
867 modify_stmt (bsi_stmt (*bsi
));
871 *p_rhs
= expand_vector_parallel (bsi
, do_unop
, type
,
872 TREE_OPERAND (rhs
, 0),
874 modify_stmt (bsi_stmt (*bsi
));
881 if (TREE_CODE_CLASS (code
) == tcc_unary
)
882 *p_rhs
= expand_vector_piecewise (bsi
, do_unop
, type
, compute_type
,
883 TREE_OPERAND (rhs
, 0),
886 *p_rhs
= expand_vector_piecewise (bsi
, do_binop
, type
, compute_type
,
887 TREE_OPERAND (rhs
, 0),
888 TREE_OPERAND (rhs
, 1), code
);
890 modify_stmt (bsi_stmt (*bsi
));
894 expand_vector_operations (void)
896 block_stmt_iterator bsi
;
901 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
902 expand_vector_operations_1 (&bsi
);
907 tree_lower_operations (void)
909 int old_last_basic_block
= last_basic_block
;
910 block_stmt_iterator bsi
;
915 if (bb
->index
>= old_last_basic_block
)
917 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
919 expand_complex_operations_1 (&bsi
);
920 expand_vector_operations_1 (&bsi
);
926 struct tree_opt_pass pass_lower_vector_ssa
=
930 expand_vector_operations
, /* execute */
933 0, /* static_pass_number */
935 PROP_cfg
, /* properties_required */
936 0, /* properties_provided */
937 0, /* properties_destroyed */
938 0, /* todo_flags_start */
939 TODO_dump_func
| TODO_rename_vars
/* todo_flags_finish */
940 | TODO_ggc_collect
| TODO_verify_ssa
941 | TODO_verify_stmts
| TODO_verify_flow
,
945 struct tree_opt_pass pass_pre_expand
=
947 "oplower", /* name */
949 tree_lower_operations
, /* execute */
952 0, /* static_pass_number */
954 PROP_cfg
, /* properties_required */
955 0, /* properties_provided */
956 0, /* properties_destroyed */
957 0, /* todo_flags_start */
958 TODO_dump_func
| TODO_ggc_collect
959 | TODO_verify_stmts
, /* todo_flags_finish */
963 #include "gt-tree-complex.h"