2005-01-16 Steven G. Kargl <kargls@comcast.net>
[official-gcc.git] / gcc / tree-complex.c
blob4a4ba62a05bc0f9d485524e7bd69bc19fbd5cd17
1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "expr.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
30 #include "optabs.h"
31 #include "machmode.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "flags.h"
38 #include "ggc.h"
41 /* Extract the real or imaginary part of a complex variable or constant.
42 Make sure that it's a proper gimple_val and gimplify it if not.
43 Emit any new code before BSI. */
45 static tree
46 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
48 tree ret, inner_type;
50 inner_type = TREE_TYPE (TREE_TYPE (t));
51 switch (TREE_CODE (t))
53 case COMPLEX_CST:
54 ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
55 break;
57 case COMPLEX_EXPR:
58 ret = TREE_OPERAND (t, imagpart_p);
59 break;
61 case VAR_DECL:
62 case PARM_DECL:
63 ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
64 inner_type, t);
65 break;
67 default:
68 gcc_unreachable ();
71 return gimplify_val (bsi, inner_type, ret);
74 /* Update an assignment to a complex variable in place. */
76 static void
77 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
79 tree stmt = bsi_stmt (*bsi);
80 tree type;
82 if (TREE_CODE (stmt) == RETURN_EXPR)
83 stmt = TREE_OPERAND (stmt, 0);
85 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
86 TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
87 modify_stmt (stmt);
90 /* Expand complex addition to scalars:
91 a + b = (ar + br) + i(ai + bi)
92 a - b = (ar - br) + i(ai + bi)
95 static void
96 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
97 tree ar, tree ai, tree br, tree bi,
98 enum tree_code code)
100 tree rr, ri;
102 rr = gimplify_build2 (bsi, code, inner_type, ar, br);
103 ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
105 update_complex_assignment (bsi, rr, ri);
108 /* Expand complex multiplication to scalars:
109 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
112 static void
113 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
114 tree ar, tree ai, tree br, tree bi)
116 tree t1, t2, t3, t4, rr, ri;
118 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
119 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
120 t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
122 /* Avoid expanding redundant multiplication for the common
123 case of squaring a complex number. */
124 if (ar == br && ai == bi)
125 t4 = t3;
126 else
127 t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
129 rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
130 ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
132 update_complex_assignment (bsi, rr, ri);
135 /* Expand complex division to scalars, straightforward algorithm.
136 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
137 t = br*br + bi*bi
140 static void
141 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
142 tree ar, tree ai, tree br, tree bi,
143 enum tree_code code)
145 tree rr, ri, div, t1, t2, t3;
147 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
148 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
149 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
151 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
152 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
153 t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
154 rr = gimplify_build2 (bsi, code, inner_type, t3, div);
156 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
157 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
158 t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
159 ri = gimplify_build2 (bsi, code, inner_type, t3, div);
161 update_complex_assignment (bsi, rr, ri);
164 /* Expand complex division to scalars, modified algorithm to minimize
165 overflow with wide input ranges. */
167 static void
168 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
169 tree ar, tree ai, tree br, tree bi,
170 enum tree_code code)
172 tree rr, ri, ratio, div, t1, t2, min, max, cond;
174 /* Examine |br| < |bi|, and branch. */
175 t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
176 t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
177 cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
178 STRIP_NOPS (cond);
180 if (TREE_CONSTANT (cond))
182 if (integer_zerop (cond))
183 min = bi, max = br;
184 else
185 min = br, max = bi;
187 else
189 basic_block bb_cond, bb_true, bb_false, bb_join;
190 tree l1, l2, l3;
191 edge e;
193 l1 = create_artificial_label ();
194 t1 = build (GOTO_EXPR, void_type_node, l1);
195 l2 = create_artificial_label ();
196 t2 = build (GOTO_EXPR, void_type_node, l2);
197 cond = build (COND_EXPR, void_type_node, cond, t1, t2);
198 bsi_insert_before (bsi, cond, BSI_SAME_STMT);
200 min = make_rename_temp (inner_type, NULL);
201 max = make_rename_temp (inner_type, NULL);
202 l3 = create_artificial_label ();
204 /* Split the original block, and create the TRUE and FALSE blocks. */
205 e = split_block (bsi->bb, cond);
206 bb_cond = e->src;
207 bb_join = e->dest;
208 bb_true = create_empty_bb (bb_cond);
209 bb_false = create_empty_bb (bb_true);
211 /* Wire the blocks together. */
212 e->flags = EDGE_TRUE_VALUE;
213 redirect_edge_succ (e, bb_true);
214 make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
215 make_edge (bb_true, bb_join, 0);
216 make_edge (bb_false, bb_join, 0);
218 /* Update dominance info. Note that bb_join's data was
219 updated by split_block. */
220 if (dom_info_available_p (CDI_DOMINATORS))
222 set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
223 set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
226 /* Compute min and max for TRUE block. */
227 *bsi = bsi_start (bb_true);
228 t1 = build (LABEL_EXPR, void_type_node, l1);
229 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
230 t1 = build (MODIFY_EXPR, inner_type, min, br);
231 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
232 t1 = build (MODIFY_EXPR, inner_type, max, bi);
233 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
235 /* Compute min and max for FALSE block. */
236 *bsi = bsi_start (bb_false);
237 t1 = build (LABEL_EXPR, void_type_node, l2);
238 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
239 t1 = build (MODIFY_EXPR, inner_type, min, bi);
240 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
241 t1 = build (MODIFY_EXPR, inner_type, max, br);
242 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
244 /* Insert the join label into the tail of the original block. */
245 *bsi = bsi_start (bb_join);
246 t1 = build (LABEL_EXPR, void_type_node, l3);
247 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
250 /* Now we have MIN(|br|, |bi|) and MAX(|br|, |bi|). We now use the
251 ratio min/max to scale both the dividend and divisor. */
252 ratio = gimplify_build2 (bsi, code, inner_type, min, max);
254 /* Calculate the divisor: min*ratio + max. */
255 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, min, ratio);
256 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, max);
258 /* Result is now ((ar + ai*ratio)/div) + i((ai - ar*ratio)/div). */
259 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
260 t2 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, ar, t1);
261 rr = gimplify_build2 (bsi, code, inner_type, t2, div);
263 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
264 t2 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
265 ri = gimplify_build2 (bsi, code, inner_type, t2, div);
267 update_complex_assignment (bsi, rr, ri);
270 /* Expand complex division to scalars. */
272 static void
273 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
274 tree ar, tree ai, tree br, tree bi,
275 enum tree_code code)
277 switch (flag_complex_divide_method)
279 case 0:
280 /* straightforward implementation of complex divide acceptable. */
281 expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
282 break;
283 case 1:
284 /* wide ranges of inputs must work for complex divide. */
285 expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
286 break;
287 default:
288 /* C99-like requirements for complex divide (not yet implemented). */
289 gcc_unreachable ();
293 /* Expand complex negation to scalars:
294 -a = (-ar) + i(-ai)
297 static void
298 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
299 tree ar, tree ai)
301 tree rr, ri;
303 rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
304 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
306 update_complex_assignment (bsi, rr, ri);
309 /* Expand complex conjugate to scalars:
310 ~a = (ar) + i(-ai)
313 static void
314 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
315 tree ar, tree ai)
317 tree ri;
319 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
321 update_complex_assignment (bsi, ar, ri);
324 /* Expand complex comparison (EQ or NE only). */
326 static void
327 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
328 tree br, tree bi, enum tree_code code)
330 tree cr, ci, cc, stmt, expr, type;
332 cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
333 ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
334 cc = gimplify_build2 (bsi,
335 (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
336 boolean_type_node, cr, ci);
338 stmt = expr = bsi_stmt (*bsi);
340 switch (TREE_CODE (stmt))
342 case RETURN_EXPR:
343 expr = TREE_OPERAND (stmt, 0);
344 /* FALLTHRU */
345 case MODIFY_EXPR:
346 type = TREE_TYPE (TREE_OPERAND (expr, 1));
347 TREE_OPERAND (expr, 1) = fold_convert (type, cc);
348 break;
349 case COND_EXPR:
350 TREE_OPERAND (stmt, 0) = cc;
351 break;
352 default:
353 gcc_unreachable ();
356 modify_stmt (stmt);
359 /* Process one statement. If we identify a complex operation, expand it. */
361 static void
362 expand_complex_operations_1 (block_stmt_iterator *bsi)
364 tree stmt = bsi_stmt (*bsi);
365 tree rhs, type, inner_type;
366 tree ac, ar, ai, bc, br, bi;
367 enum tree_code code;
369 switch (TREE_CODE (stmt))
371 case RETURN_EXPR:
372 stmt = TREE_OPERAND (stmt, 0);
373 if (!stmt)
374 return;
375 if (TREE_CODE (stmt) != MODIFY_EXPR)
376 return;
377 /* FALLTHRU */
379 case MODIFY_EXPR:
380 rhs = TREE_OPERAND (stmt, 1);
381 break;
383 case COND_EXPR:
384 rhs = TREE_OPERAND (stmt, 0);
385 break;
387 default:
388 return;
391 type = TREE_TYPE (rhs);
392 code = TREE_CODE (rhs);
394 /* Initial filter for operations we handle. */
395 switch (code)
397 case PLUS_EXPR:
398 case MINUS_EXPR:
399 case MULT_EXPR:
400 case TRUNC_DIV_EXPR:
401 case CEIL_DIV_EXPR:
402 case FLOOR_DIV_EXPR:
403 case ROUND_DIV_EXPR:
404 case RDIV_EXPR:
405 case NEGATE_EXPR:
406 case CONJ_EXPR:
407 if (TREE_CODE (type) != COMPLEX_TYPE)
408 return;
409 inner_type = TREE_TYPE (type);
410 break;
412 case EQ_EXPR:
413 case NE_EXPR:
414 inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
415 if (TREE_CODE (inner_type) != COMPLEX_TYPE)
416 return;
417 break;
419 default:
420 return;
423 /* Extract the components of the two complex values. Make sure and
424 handle the common case of the same value used twice specially. */
425 ac = TREE_OPERAND (rhs, 0);
426 ar = extract_component (bsi, ac, 0);
427 ai = extract_component (bsi, ac, 1);
429 if (TREE_CODE_CLASS (code) == tcc_unary)
430 bc = br = bi = NULL;
431 else
433 bc = TREE_OPERAND (rhs, 1);
434 if (ac == bc)
435 br = ar, bi = ai;
436 else
438 br = extract_component (bsi, bc, 0);
439 bi = extract_component (bsi, bc, 1);
443 switch (code)
445 case PLUS_EXPR:
446 case MINUS_EXPR:
447 expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
448 break;
450 case MULT_EXPR:
451 expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
452 break;
454 case TRUNC_DIV_EXPR:
455 case CEIL_DIV_EXPR:
456 case FLOOR_DIV_EXPR:
457 case ROUND_DIV_EXPR:
458 case RDIV_EXPR:
459 expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
460 break;
462 case NEGATE_EXPR:
463 expand_complex_negation (bsi, inner_type, ar, ai);
464 break;
466 case CONJ_EXPR:
467 expand_complex_conjugate (bsi, inner_type, ar, ai);
468 break;
470 case EQ_EXPR:
471 case NE_EXPR:
472 expand_complex_comparison (bsi, ar, ai, br, bi, code);
473 break;
475 default:
476 gcc_unreachable ();
480 /* Build a constant of type TYPE, made of VALUE's bits replicated
481 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
482 static tree
483 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
485 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
486 int n = HOST_BITS_PER_WIDE_INT / width;
487 unsigned HOST_WIDE_INT low, high, mask;
488 tree ret;
490 gcc_assert (n);
492 if (width == HOST_BITS_PER_WIDE_INT)
493 low = value;
494 else
496 mask = ((HOST_WIDE_INT)1 << width) - 1;
497 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
500 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
501 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
502 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
503 high = 0;
504 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
505 high = low;
506 else
507 gcc_unreachable ();
509 ret = build_int_cst_wide (type, low, high);
510 return ret;
513 static GTY(()) tree vector_inner_type;
514 static GTY(()) tree vector_last_type;
515 static GTY(()) int vector_last_nunits;
517 /* Return a suitable vector types made of SUBPARTS units each of mode
518 "word_mode" (the global variable). */
519 static tree
520 build_word_mode_vector_type (int nunits)
522 if (!vector_inner_type)
523 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
524 else if (vector_last_nunits == nunits)
526 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
527 return vector_last_type;
530 /* We build a new type, but we canonicalize it nevertheless,
531 because it still saves some memory. */
532 vector_last_nunits = nunits;
533 vector_last_type = type_hash_canon (nunits,
534 build_vector_type (vector_inner_type,
535 nunits));
536 return vector_last_type;
539 typedef tree (*elem_op_func) (block_stmt_iterator *,
540 tree, tree, tree, tree, tree, enum tree_code);
542 static inline tree
543 tree_vec_extract (block_stmt_iterator *bsi, tree type,
544 tree t, tree bitsize, tree bitpos)
546 if (bitpos)
547 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
548 else
549 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
552 static tree
553 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
554 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
555 enum tree_code code)
557 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
558 return gimplify_build1 (bsi, code, inner_type, a);
561 static tree
562 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
563 tree bitpos, tree bitsize, enum tree_code code)
565 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
566 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
567 return gimplify_build2 (bsi, code, inner_type, a, b);
570 /* Expand vector addition to scalars. This does bit twiddling
571 in order to increase parallelism:
573 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
574 (a ^ b) & 0x80808080
576 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
577 (a ^ ~b) & 0x80808080
579 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
581 This optimization should be done only if 4 vector items or more
582 fit into a word. */
583 static tree
584 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
585 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
586 enum tree_code code)
588 tree inner_type = TREE_TYPE (TREE_TYPE (a));
589 unsigned HOST_WIDE_INT max;
590 tree low_bits, high_bits, a_low, b_low, result_low, signs;
592 max = GET_MODE_MASK (TYPE_MODE (inner_type));
593 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
594 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
596 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
597 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
599 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
600 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
601 if (code == PLUS_EXPR)
602 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
603 else
605 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
606 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
609 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
610 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
611 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
614 static tree
615 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
616 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
617 tree bitsize ATTRIBUTE_UNUSED,
618 enum tree_code code ATTRIBUTE_UNUSED)
620 tree inner_type = TREE_TYPE (TREE_TYPE (b));
621 HOST_WIDE_INT max;
622 tree low_bits, high_bits, b_low, result_low, signs;
624 max = GET_MODE_MASK (TYPE_MODE (inner_type));
625 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
626 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
628 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
630 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
631 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
632 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
633 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
634 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
637 /* Expand a vector operation to scalars, by using many operations
638 whose type is the vector type's inner type. */
639 static tree
640 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
641 tree type, tree inner_type,
642 tree a, tree b, enum tree_code code)
644 tree head, *chain = &head;
645 tree part_width = TYPE_SIZE (inner_type);
646 tree index = bitsize_int (0);
647 int nunits = TYPE_VECTOR_SUBPARTS (type);
648 int delta = tree_low_cst (part_width, 1)
649 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
650 int i;
652 for (i = 0; i < nunits;
653 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
655 tree result = f (bsi, inner_type, a, b, index, part_width, code);
656 *chain = tree_cons (NULL_TREE, result, NULL_TREE);
657 chain = &TREE_CHAIN (*chain);
660 return build1 (CONSTRUCTOR, type, head);
663 /* Expand a vector operation to scalars with the freedom to use
664 a scalar integer type, or to use a different size for the items
665 in the vector type. */
666 static tree
667 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
668 tree a, tree b,
669 enum tree_code code)
671 tree result, compute_type;
672 enum machine_mode mode;
673 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
675 /* We have three strategies. If the type is already correct, just do
676 the operation an element at a time. Else, if the vector is wider than
677 one word, do it a word at a time; finally, if the vector is smaller
678 than one word, do it as a scalar. */
679 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
680 return expand_vector_piecewise (bsi, f,
681 type, TREE_TYPE (type),
682 a, b, code);
683 else if (n_words > 1)
685 tree word_type = build_word_mode_vector_type (n_words);
686 result = expand_vector_piecewise (bsi, f,
687 word_type, TREE_TYPE (word_type),
688 a, b, code);
689 result = gimplify_val (bsi, word_type, result);
691 else
693 /* Use a single scalar operation with a mode no wider than word_mode. */
694 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
695 compute_type = lang_hooks.types.type_for_mode (mode, 1);
696 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
699 return build1 (VIEW_CONVERT_EXPR, type, result);
702 /* Expand a vector operation to scalars; for integer types we can use
703 special bit twiddling tricks to do the sums a word at a time, using
704 function F_PARALLEL instead of F. These tricks are done only if
705 they can process at least four items, that is, only if the vector
706 holds at least four items and if a word can hold four items. */
707 static tree
708 expand_vector_addition (block_stmt_iterator *bsi,
709 elem_op_func f, elem_op_func f_parallel,
710 tree type, tree a, tree b, enum tree_code code)
712 int parts_per_word = UNITS_PER_WORD
713 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
715 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
716 && parts_per_word >= 4
717 && TYPE_VECTOR_SUBPARTS (type) >= 4)
718 return expand_vector_parallel (bsi, f_parallel,
719 type, a, b, code);
720 else
721 return expand_vector_piecewise (bsi, f,
722 type, TREE_TYPE (type),
723 a, b, code);
726 /* Return a type for the widest vector mode whose components are of mode
727 INNER_MODE, or NULL_TREE if none is found. */
728 static tree
729 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
731 enum machine_mode best_mode = VOIDmode, mode;
732 int best_nunits = 0;
734 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
735 mode = MIN_MODE_VECTOR_FLOAT;
736 else
737 mode = MIN_MODE_VECTOR_INT;
739 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
740 if (GET_MODE_INNER (mode) == inner_mode
741 && GET_MODE_NUNITS (mode) > best_nunits
742 && op->handlers[mode].insn_code != CODE_FOR_nothing)
743 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
745 if (best_mode == VOIDmode)
746 return NULL_TREE;
747 else
748 return lang_hooks.types.type_for_mode (best_mode, 1);
751 /* Process one statement. If we identify a vector operation, expand it. */
753 static void
754 expand_vector_operations_1 (block_stmt_iterator *bsi)
756 tree stmt = bsi_stmt (*bsi);
757 tree *p_rhs, rhs, type, compute_type;
758 enum tree_code code;
759 enum machine_mode compute_mode;
760 optab op;
762 switch (TREE_CODE (stmt))
764 case RETURN_EXPR:
765 stmt = TREE_OPERAND (stmt, 0);
766 if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
767 return;
769 /* FALLTHRU */
771 case MODIFY_EXPR:
772 p_rhs = &TREE_OPERAND (stmt, 1);
773 rhs = *p_rhs;
774 break;
776 default:
777 return;
780 type = TREE_TYPE (rhs);
781 if (TREE_CODE (type) != VECTOR_TYPE)
782 return;
784 code = TREE_CODE (rhs);
785 if (TREE_CODE_CLASS (code) != tcc_unary
786 && TREE_CODE_CLASS (code) != tcc_binary)
787 return;
789 if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
790 return;
792 gcc_assert (code != CONVERT_EXPR);
793 op = optab_for_tree_code (code, type);
795 /* Optabs will try converting a negation into a subtraction, so
796 look for it as well. TODO: negation of floating-point vectors
797 might be turned into an exclusive OR toggling the sign bit. */
798 if (op == NULL
799 && code == NEGATE_EXPR
800 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
801 op = optab_for_tree_code (MINUS_EXPR, type);
803 /* For very wide vectors, try using a smaller vector mode. */
804 compute_type = type;
805 if (TYPE_MODE (type) == BLKmode && op)
807 tree vector_compute_type
808 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
809 if (vector_compute_type != NULL_TREE)
810 compute_type = vector_compute_type;
813 compute_mode = TYPE_MODE (compute_type);
815 /* If we are breaking a BLKmode vector into smaller pieces,
816 type_for_widest_vector_mode has already looked into the optab,
817 so skip these checks. */
818 if (compute_type == type)
820 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
821 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
822 && op != NULL
823 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
824 return;
825 else
827 /* There is no operation in hardware, so fall back to scalars. */
828 compute_type = TREE_TYPE (type);
829 compute_mode = TYPE_MODE (compute_type);
833 /* If the compute mode is not a vector mode (hence we are decomposing
834 a BLKmode vector to smaller, hardware-supported vectors), we may
835 want to expand the operations in parallel. */
836 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
837 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
838 switch (code)
840 case PLUS_EXPR:
841 case MINUS_EXPR:
842 if (TYPE_TRAP_SIGNED (type))
843 break;
845 *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
846 TREE_OPERAND (rhs, 0),
847 TREE_OPERAND (rhs, 1), code);
848 modify_stmt (bsi_stmt (*bsi));
849 return;
851 case NEGATE_EXPR:
852 if (TYPE_TRAP_SIGNED (type))
853 break;
855 *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
856 TREE_OPERAND (rhs, 0),
857 NULL_TREE, code);
858 modify_stmt (bsi_stmt (*bsi));
859 return;
861 case BIT_AND_EXPR:
862 case BIT_IOR_EXPR:
863 case BIT_XOR_EXPR:
864 *p_rhs = expand_vector_parallel (bsi, do_binop, type,
865 TREE_OPERAND (rhs, 0),
866 TREE_OPERAND (rhs, 1), code);
867 modify_stmt (bsi_stmt (*bsi));
868 return;
870 case BIT_NOT_EXPR:
871 *p_rhs = expand_vector_parallel (bsi, do_unop, type,
872 TREE_OPERAND (rhs, 0),
873 NULL_TREE, code);
874 modify_stmt (bsi_stmt (*bsi));
875 return;
877 default:
878 break;
881 if (TREE_CODE_CLASS (code) == tcc_unary)
882 *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
883 TREE_OPERAND (rhs, 0),
884 NULL_TREE, code);
885 else
886 *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
887 TREE_OPERAND (rhs, 0),
888 TREE_OPERAND (rhs, 1), code);
890 modify_stmt (bsi_stmt (*bsi));
893 static void
894 expand_vector_operations (void)
896 block_stmt_iterator bsi;
897 basic_block bb;
899 FOR_EACH_BB (bb)
901 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
902 expand_vector_operations_1 (&bsi);
906 static void
907 tree_lower_operations (void)
909 int old_last_basic_block = last_basic_block;
910 block_stmt_iterator bsi;
911 basic_block bb;
913 FOR_EACH_BB (bb)
915 if (bb->index >= old_last_basic_block)
916 continue;
917 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
919 expand_complex_operations_1 (&bsi);
920 expand_vector_operations_1 (&bsi);
926 struct tree_opt_pass pass_lower_vector_ssa =
928 "vector", /* name */
929 NULL, /* gate */
930 expand_vector_operations, /* execute */
931 NULL, /* sub */
932 NULL, /* next */
933 0, /* static_pass_number */
934 0, /* tv_id */
935 PROP_cfg, /* properties_required */
936 0, /* properties_provided */
937 0, /* properties_destroyed */
938 0, /* todo_flags_start */
939 TODO_dump_func | TODO_rename_vars /* todo_flags_finish */
940 | TODO_ggc_collect | TODO_verify_ssa
941 | TODO_verify_stmts | TODO_verify_flow,
942 0 /* letter */
945 struct tree_opt_pass pass_pre_expand =
947 "oplower", /* name */
948 0, /* gate */
949 tree_lower_operations, /* execute */
950 NULL, /* sub */
951 NULL, /* next */
952 0, /* static_pass_number */
953 0, /* tv_id */
954 PROP_cfg, /* properties_required */
955 0, /* properties_provided */
956 0, /* properties_destroyed */
957 0, /* todo_flags_start */
958 TODO_dump_func | TODO_ggc_collect
959 | TODO_verify_stmts, /* todo_flags_finish */
960 0 /* letter */
963 #include "gt-tree-complex.h"