PR middle-end/19583
[official-gcc.git] / gcc / tree-complex.c
blob3373326aad9190355b1a0fc84738cd8b9751a056
1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "expr.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
30 #include "optabs.h"
31 #include "machmode.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "flags.h"
38 #include "ggc.h"
41 /* Extract the real or imaginary part of a complex variable or constant.
42 Make sure that it's a proper gimple_val and gimplify it if not.
43 Emit any new code before BSI. */
45 static tree
46 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
48 tree ret, inner_type;
50 inner_type = TREE_TYPE (TREE_TYPE (t));
51 switch (TREE_CODE (t))
53 case COMPLEX_CST:
54 ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
55 break;
57 case COMPLEX_EXPR:
58 ret = TREE_OPERAND (t, imagpart_p);
59 break;
61 case VAR_DECL:
62 case PARM_DECL:
63 ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
64 inner_type, t);
65 break;
67 default:
68 gcc_unreachable ();
71 return gimplify_val (bsi, inner_type, ret);
74 /* Update an assignment to a complex variable in place. */
76 static void
77 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
79 tree stmt = bsi_stmt (*bsi);
80 tree type;
82 if (TREE_CODE (stmt) == RETURN_EXPR)
83 stmt = TREE_OPERAND (stmt, 0);
85 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
86 TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
87 modify_stmt (stmt);
90 /* Expand complex addition to scalars:
91 a + b = (ar + br) + i(ai + bi)
92 a - b = (ar - br) + i(ai + bi)
95 static void
96 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
97 tree ar, tree ai, tree br, tree bi,
98 enum tree_code code)
100 tree rr, ri;
102 rr = gimplify_build2 (bsi, code, inner_type, ar, br);
103 ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
105 update_complex_assignment (bsi, rr, ri);
108 /* Expand complex multiplication to scalars:
109 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
112 static void
113 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
114 tree ar, tree ai, tree br, tree bi)
116 tree t1, t2, t3, t4, rr, ri;
118 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
119 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
120 t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
122 /* Avoid expanding redundant multiplication for the common
123 case of squaring a complex number. */
124 if (ar == br && ai == bi)
125 t4 = t3;
126 else
127 t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
129 rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
130 ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
132 update_complex_assignment (bsi, rr, ri);
135 /* Expand complex division to scalars, straightforward algorithm.
136 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
137 t = br*br + bi*bi
140 static void
141 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
142 tree ar, tree ai, tree br, tree bi,
143 enum tree_code code)
145 tree rr, ri, div, t1, t2, t3;
147 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
148 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
149 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
151 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
152 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
153 t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
154 rr = gimplify_build2 (bsi, code, inner_type, t3, div);
156 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
157 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
158 t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
159 ri = gimplify_build2 (bsi, code, inner_type, t3, div);
161 update_complex_assignment (bsi, rr, ri);
164 /* Expand complex division to scalars, modified algorithm to minimize
165 overflow with wide input ranges. */
167 static void
168 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
169 tree ar, tree ai, tree br, tree bi,
170 enum tree_code code)
172 tree rr, ri, ratio, div, t1, t2, tr, ti, cond;
173 basic_block bb_cond, bb_true, bb_false, bb_join;
175 /* Examine |br| < |bi|, and branch. */
176 t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
177 t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
178 cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
179 STRIP_NOPS (cond);
181 bb_cond = bb_true = bb_false = bb_join = NULL;
182 rr = ri = tr = ti = NULL;
183 if (!TREE_CONSTANT (cond))
185 edge e;
187 cond = build (COND_EXPR, void_type_node, cond, NULL, NULL);
188 bsi_insert_before (bsi, cond, BSI_SAME_STMT);
190 /* Split the original block, and create the TRUE and FALSE blocks. */
191 e = split_block (bsi->bb, cond);
192 bb_cond = e->src;
193 bb_join = e->dest;
194 bb_true = create_empty_bb (bb_cond);
195 bb_false = create_empty_bb (bb_true);
197 t1 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_true));
198 t2 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_false));
199 COND_EXPR_THEN (cond) = t1;
200 COND_EXPR_ELSE (cond) = t2;
202 /* Wire the blocks together. */
203 e->flags = EDGE_TRUE_VALUE;
204 redirect_edge_succ (e, bb_true);
205 make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
206 make_edge (bb_true, bb_join, EDGE_FALLTHRU);
207 make_edge (bb_false, bb_join, EDGE_FALLTHRU);
209 /* Update dominance info. Note that bb_join's data was
210 updated by split_block. */
211 if (dom_info_available_p (CDI_DOMINATORS))
213 set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
214 set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
217 rr = make_rename_temp (inner_type, NULL);
218 ri = make_rename_temp (inner_type, NULL);
221 /* In the TRUE branch, we compute
222 ratio = br/bi;
223 div = (br * ratio) + bi;
224 tr = (ar * ratio) + ai;
225 ti = (ai * ratio) - ar;
226 tr = tr / div;
227 ti = ti / div; */
228 if (bb_true || integer_nonzerop (cond))
230 if (bb_true)
232 *bsi = bsi_last (bb_true);
233 bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT);
236 ratio = gimplify_build2 (bsi, code, inner_type, br, bi);
238 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, ratio);
239 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, bi);
241 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
242 tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ai);
244 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
245 ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, ar);
247 tr = gimplify_build2 (bsi, code, inner_type, tr, div);
248 ti = gimplify_build2 (bsi, code, inner_type, ti, div);
250 if (bb_true)
252 t1 = build (MODIFY_EXPR, inner_type, rr, tr);
253 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
254 t1 = build (MODIFY_EXPR, inner_type, ri, ti);
255 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
256 bsi_remove (bsi);
260 /* In the FALSE branch, we compute
261 ratio = d/c;
262 divisor = (d * ratio) + c;
263 tr = (b * ratio) + a;
264 ti = b - (a * ratio);
265 tr = tr / div;
266 ti = ti / div; */
267 if (bb_false || integer_zerop (cond))
269 if (bb_false)
271 *bsi = bsi_last (bb_false);
272 bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT);
275 ratio = gimplify_build2 (bsi, code, inner_type, bi, br);
277 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, ratio);
278 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, br);
280 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
281 tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ar);
283 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
284 ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
286 tr = gimplify_build2 (bsi, code, inner_type, tr, div);
287 ti = gimplify_build2 (bsi, code, inner_type, ti, div);
289 if (bb_false)
291 t1 = build (MODIFY_EXPR, inner_type, rr, tr);
292 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
293 t1 = build (MODIFY_EXPR, inner_type, ri, ti);
294 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
295 bsi_remove (bsi);
299 if (bb_join)
300 *bsi = bsi_start (bb_join);
301 else
302 rr = tr, ri = ti;
304 update_complex_assignment (bsi, rr, ri);
307 /* Expand complex division to scalars. */
309 static void
310 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
311 tree ar, tree ai, tree br, tree bi,
312 enum tree_code code)
314 switch (flag_complex_divide_method)
316 case 0:
317 /* straightforward implementation of complex divide acceptable. */
318 expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
319 break;
320 case 1:
321 /* wide ranges of inputs must work for complex divide. */
322 expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
323 break;
324 default:
325 /* C99-like requirements for complex divide (not yet implemented). */
326 gcc_unreachable ();
330 /* Expand complex negation to scalars:
331 -a = (-ar) + i(-ai)
334 static void
335 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
336 tree ar, tree ai)
338 tree rr, ri;
340 rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
341 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
343 update_complex_assignment (bsi, rr, ri);
346 /* Expand complex conjugate to scalars:
347 ~a = (ar) + i(-ai)
350 static void
351 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
352 tree ar, tree ai)
354 tree ri;
356 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
358 update_complex_assignment (bsi, ar, ri);
361 /* Expand complex comparison (EQ or NE only). */
363 static void
364 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
365 tree br, tree bi, enum tree_code code)
367 tree cr, ci, cc, stmt, expr, type;
369 cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
370 ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
371 cc = gimplify_build2 (bsi,
372 (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
373 boolean_type_node, cr, ci);
375 stmt = expr = bsi_stmt (*bsi);
377 switch (TREE_CODE (stmt))
379 case RETURN_EXPR:
380 expr = TREE_OPERAND (stmt, 0);
381 /* FALLTHRU */
382 case MODIFY_EXPR:
383 type = TREE_TYPE (TREE_OPERAND (expr, 1));
384 TREE_OPERAND (expr, 1) = fold_convert (type, cc);
385 break;
386 case COND_EXPR:
387 TREE_OPERAND (stmt, 0) = cc;
388 break;
389 default:
390 gcc_unreachable ();
393 modify_stmt (stmt);
396 /* Process one statement. If we identify a complex operation, expand it. */
398 static void
399 expand_complex_operations_1 (block_stmt_iterator *bsi)
401 tree stmt = bsi_stmt (*bsi);
402 tree rhs, type, inner_type;
403 tree ac, ar, ai, bc, br, bi;
404 enum tree_code code;
406 switch (TREE_CODE (stmt))
408 case RETURN_EXPR:
409 stmt = TREE_OPERAND (stmt, 0);
410 if (!stmt)
411 return;
412 if (TREE_CODE (stmt) != MODIFY_EXPR)
413 return;
414 /* FALLTHRU */
416 case MODIFY_EXPR:
417 rhs = TREE_OPERAND (stmt, 1);
418 break;
420 case COND_EXPR:
421 rhs = TREE_OPERAND (stmt, 0);
422 break;
424 default:
425 return;
428 type = TREE_TYPE (rhs);
429 code = TREE_CODE (rhs);
431 /* Initial filter for operations we handle. */
432 switch (code)
434 case PLUS_EXPR:
435 case MINUS_EXPR:
436 case MULT_EXPR:
437 case TRUNC_DIV_EXPR:
438 case CEIL_DIV_EXPR:
439 case FLOOR_DIV_EXPR:
440 case ROUND_DIV_EXPR:
441 case RDIV_EXPR:
442 case NEGATE_EXPR:
443 case CONJ_EXPR:
444 if (TREE_CODE (type) != COMPLEX_TYPE)
445 return;
446 inner_type = TREE_TYPE (type);
447 break;
449 case EQ_EXPR:
450 case NE_EXPR:
451 inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
452 if (TREE_CODE (inner_type) != COMPLEX_TYPE)
453 return;
454 break;
456 default:
457 return;
460 /* Extract the components of the two complex values. Make sure and
461 handle the common case of the same value used twice specially. */
462 ac = TREE_OPERAND (rhs, 0);
463 ar = extract_component (bsi, ac, 0);
464 ai = extract_component (bsi, ac, 1);
466 if (TREE_CODE_CLASS (code) == tcc_unary)
467 bc = br = bi = NULL;
468 else
470 bc = TREE_OPERAND (rhs, 1);
471 if (ac == bc)
472 br = ar, bi = ai;
473 else
475 br = extract_component (bsi, bc, 0);
476 bi = extract_component (bsi, bc, 1);
480 switch (code)
482 case PLUS_EXPR:
483 case MINUS_EXPR:
484 expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
485 break;
487 case MULT_EXPR:
488 expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
489 break;
491 case TRUNC_DIV_EXPR:
492 case CEIL_DIV_EXPR:
493 case FLOOR_DIV_EXPR:
494 case ROUND_DIV_EXPR:
495 case RDIV_EXPR:
496 expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
497 break;
499 case NEGATE_EXPR:
500 expand_complex_negation (bsi, inner_type, ar, ai);
501 break;
503 case CONJ_EXPR:
504 expand_complex_conjugate (bsi, inner_type, ar, ai);
505 break;
507 case EQ_EXPR:
508 case NE_EXPR:
509 expand_complex_comparison (bsi, ar, ai, br, bi, code);
510 break;
512 default:
513 gcc_unreachable ();
517 /* Build a constant of type TYPE, made of VALUE's bits replicated
518 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
519 static tree
520 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
522 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
523 int n = HOST_BITS_PER_WIDE_INT / width;
524 unsigned HOST_WIDE_INT low, high, mask;
525 tree ret;
527 gcc_assert (n);
529 if (width == HOST_BITS_PER_WIDE_INT)
530 low = value;
531 else
533 mask = ((HOST_WIDE_INT)1 << width) - 1;
534 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
537 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
538 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
539 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
540 high = 0;
541 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
542 high = low;
543 else
544 gcc_unreachable ();
546 ret = build_int_cst_wide (type, low, high);
547 return ret;
550 static GTY(()) tree vector_inner_type;
551 static GTY(()) tree vector_last_type;
552 static GTY(()) int vector_last_nunits;
554 /* Return a suitable vector types made of SUBPARTS units each of mode
555 "word_mode" (the global variable). */
556 static tree
557 build_word_mode_vector_type (int nunits)
559 if (!vector_inner_type)
560 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
561 else if (vector_last_nunits == nunits)
563 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
564 return vector_last_type;
567 /* We build a new type, but we canonicalize it nevertheless,
568 because it still saves some memory. */
569 vector_last_nunits = nunits;
570 vector_last_type = type_hash_canon (nunits,
571 build_vector_type (vector_inner_type,
572 nunits));
573 return vector_last_type;
576 typedef tree (*elem_op_func) (block_stmt_iterator *,
577 tree, tree, tree, tree, tree, enum tree_code);
579 static inline tree
580 tree_vec_extract (block_stmt_iterator *bsi, tree type,
581 tree t, tree bitsize, tree bitpos)
583 if (bitpos)
584 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
585 else
586 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
589 static tree
590 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
591 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
592 enum tree_code code)
594 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
595 return gimplify_build1 (bsi, code, inner_type, a);
598 static tree
599 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
600 tree bitpos, tree bitsize, enum tree_code code)
602 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
603 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
604 return gimplify_build2 (bsi, code, inner_type, a, b);
607 /* Expand vector addition to scalars. This does bit twiddling
608 in order to increase parallelism:
610 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
611 (a ^ b) & 0x80808080
613 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
614 (a ^ ~b) & 0x80808080
616 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
618 This optimization should be done only if 4 vector items or more
619 fit into a word. */
620 static tree
621 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
622 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
623 enum tree_code code)
625 tree inner_type = TREE_TYPE (TREE_TYPE (a));
626 unsigned HOST_WIDE_INT max;
627 tree low_bits, high_bits, a_low, b_low, result_low, signs;
629 max = GET_MODE_MASK (TYPE_MODE (inner_type));
630 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
631 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
633 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
634 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
636 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
637 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
638 if (code == PLUS_EXPR)
639 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
640 else
642 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
643 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
646 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
647 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
648 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
651 static tree
652 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
653 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
654 tree bitsize ATTRIBUTE_UNUSED,
655 enum tree_code code ATTRIBUTE_UNUSED)
657 tree inner_type = TREE_TYPE (TREE_TYPE (b));
658 HOST_WIDE_INT max;
659 tree low_bits, high_bits, b_low, result_low, signs;
661 max = GET_MODE_MASK (TYPE_MODE (inner_type));
662 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
663 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
665 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
667 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
668 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
669 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
670 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
671 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
674 /* Expand a vector operation to scalars, by using many operations
675 whose type is the vector type's inner type. */
676 static tree
677 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
678 tree type, tree inner_type,
679 tree a, tree b, enum tree_code code)
681 tree head, *chain = &head;
682 tree part_width = TYPE_SIZE (inner_type);
683 tree index = bitsize_int (0);
684 int nunits = TYPE_VECTOR_SUBPARTS (type);
685 int delta = tree_low_cst (part_width, 1)
686 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
687 int i;
689 for (i = 0; i < nunits;
690 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
692 tree result = f (bsi, inner_type, a, b, index, part_width, code);
693 *chain = tree_cons (NULL_TREE, result, NULL_TREE);
694 chain = &TREE_CHAIN (*chain);
697 return build1 (CONSTRUCTOR, type, head);
700 /* Expand a vector operation to scalars with the freedom to use
701 a scalar integer type, or to use a different size for the items
702 in the vector type. */
703 static tree
704 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
705 tree a, tree b,
706 enum tree_code code)
708 tree result, compute_type;
709 enum machine_mode mode;
710 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
712 /* We have three strategies. If the type is already correct, just do
713 the operation an element at a time. Else, if the vector is wider than
714 one word, do it a word at a time; finally, if the vector is smaller
715 than one word, do it as a scalar. */
716 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
717 return expand_vector_piecewise (bsi, f,
718 type, TREE_TYPE (type),
719 a, b, code);
720 else if (n_words > 1)
722 tree word_type = build_word_mode_vector_type (n_words);
723 result = expand_vector_piecewise (bsi, f,
724 word_type, TREE_TYPE (word_type),
725 a, b, code);
726 result = gimplify_val (bsi, word_type, result);
728 else
730 /* Use a single scalar operation with a mode no wider than word_mode. */
731 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
732 compute_type = lang_hooks.types.type_for_mode (mode, 1);
733 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
736 return build1 (VIEW_CONVERT_EXPR, type, result);
739 /* Expand a vector operation to scalars; for integer types we can use
740 special bit twiddling tricks to do the sums a word at a time, using
741 function F_PARALLEL instead of F. These tricks are done only if
742 they can process at least four items, that is, only if the vector
743 holds at least four items and if a word can hold four items. */
744 static tree
745 expand_vector_addition (block_stmt_iterator *bsi,
746 elem_op_func f, elem_op_func f_parallel,
747 tree type, tree a, tree b, enum tree_code code)
749 int parts_per_word = UNITS_PER_WORD
750 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
752 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
753 && parts_per_word >= 4
754 && TYPE_VECTOR_SUBPARTS (type) >= 4)
755 return expand_vector_parallel (bsi, f_parallel,
756 type, a, b, code);
757 else
758 return expand_vector_piecewise (bsi, f,
759 type, TREE_TYPE (type),
760 a, b, code);
763 /* Return a type for the widest vector mode whose components are of mode
764 INNER_MODE, or NULL_TREE if none is found. */
765 static tree
766 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
768 enum machine_mode best_mode = VOIDmode, mode;
769 int best_nunits = 0;
771 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
772 mode = MIN_MODE_VECTOR_FLOAT;
773 else
774 mode = MIN_MODE_VECTOR_INT;
776 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
777 if (GET_MODE_INNER (mode) == inner_mode
778 && GET_MODE_NUNITS (mode) > best_nunits
779 && op->handlers[mode].insn_code != CODE_FOR_nothing)
780 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
782 if (best_mode == VOIDmode)
783 return NULL_TREE;
784 else
785 return lang_hooks.types.type_for_mode (best_mode, 1);
788 /* Process one statement. If we identify a vector operation, expand it. */
790 static void
791 expand_vector_operations_1 (block_stmt_iterator *bsi)
793 tree stmt = bsi_stmt (*bsi);
794 tree *p_rhs, rhs, type, compute_type;
795 enum tree_code code;
796 enum machine_mode compute_mode;
797 optab op;
799 switch (TREE_CODE (stmt))
801 case RETURN_EXPR:
802 stmt = TREE_OPERAND (stmt, 0);
803 if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
804 return;
806 /* FALLTHRU */
808 case MODIFY_EXPR:
809 p_rhs = &TREE_OPERAND (stmt, 1);
810 rhs = *p_rhs;
811 break;
813 default:
814 return;
817 type = TREE_TYPE (rhs);
818 if (TREE_CODE (type) != VECTOR_TYPE)
819 return;
821 code = TREE_CODE (rhs);
822 if (TREE_CODE_CLASS (code) != tcc_unary
823 && TREE_CODE_CLASS (code) != tcc_binary)
824 return;
826 if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
827 return;
829 gcc_assert (code != CONVERT_EXPR);
830 op = optab_for_tree_code (code, type);
832 /* Optabs will try converting a negation into a subtraction, so
833 look for it as well. TODO: negation of floating-point vectors
834 might be turned into an exclusive OR toggling the sign bit. */
835 if (op == NULL
836 && code == NEGATE_EXPR
837 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
838 op = optab_for_tree_code (MINUS_EXPR, type);
840 /* For very wide vectors, try using a smaller vector mode. */
841 compute_type = type;
842 if (TYPE_MODE (type) == BLKmode && op)
844 tree vector_compute_type
845 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
846 if (vector_compute_type != NULL_TREE)
847 compute_type = vector_compute_type;
850 compute_mode = TYPE_MODE (compute_type);
852 /* If we are breaking a BLKmode vector into smaller pieces,
853 type_for_widest_vector_mode has already looked into the optab,
854 so skip these checks. */
855 if (compute_type == type)
857 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
858 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
859 && op != NULL
860 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
861 return;
862 else
864 /* There is no operation in hardware, so fall back to scalars. */
865 compute_type = TREE_TYPE (type);
866 compute_mode = TYPE_MODE (compute_type);
870 /* If the compute mode is not a vector mode (hence we are decomposing
871 a BLKmode vector to smaller, hardware-supported vectors), we may
872 want to expand the operations in parallel. */
873 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
874 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
875 switch (code)
877 case PLUS_EXPR:
878 case MINUS_EXPR:
879 if (TYPE_TRAP_SIGNED (type))
880 break;
882 *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
883 TREE_OPERAND (rhs, 0),
884 TREE_OPERAND (rhs, 1), code);
885 modify_stmt (bsi_stmt (*bsi));
886 return;
888 case NEGATE_EXPR:
889 if (TYPE_TRAP_SIGNED (type))
890 break;
892 *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
893 TREE_OPERAND (rhs, 0),
894 NULL_TREE, code);
895 modify_stmt (bsi_stmt (*bsi));
896 return;
898 case BIT_AND_EXPR:
899 case BIT_IOR_EXPR:
900 case BIT_XOR_EXPR:
901 *p_rhs = expand_vector_parallel (bsi, do_binop, type,
902 TREE_OPERAND (rhs, 0),
903 TREE_OPERAND (rhs, 1), code);
904 modify_stmt (bsi_stmt (*bsi));
905 return;
907 case BIT_NOT_EXPR:
908 *p_rhs = expand_vector_parallel (bsi, do_unop, type,
909 TREE_OPERAND (rhs, 0),
910 NULL_TREE, code);
911 modify_stmt (bsi_stmt (*bsi));
912 return;
914 default:
915 break;
918 if (TREE_CODE_CLASS (code) == tcc_unary)
919 *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
920 TREE_OPERAND (rhs, 0),
921 NULL_TREE, code);
922 else
923 *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
924 TREE_OPERAND (rhs, 0),
925 TREE_OPERAND (rhs, 1), code);
927 modify_stmt (bsi_stmt (*bsi));
930 static void
931 expand_vector_operations (void)
933 block_stmt_iterator bsi;
934 basic_block bb;
936 FOR_EACH_BB (bb)
938 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
939 expand_vector_operations_1 (&bsi);
943 static void
944 tree_lower_operations (void)
946 int old_last_basic_block = last_basic_block;
947 block_stmt_iterator bsi;
948 basic_block bb;
950 FOR_EACH_BB (bb)
952 if (bb->index >= old_last_basic_block)
953 continue;
954 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
956 expand_complex_operations_1 (&bsi);
957 expand_vector_operations_1 (&bsi);
963 struct tree_opt_pass pass_lower_vector_ssa =
965 "vector", /* name */
966 NULL, /* gate */
967 expand_vector_operations, /* execute */
968 NULL, /* sub */
969 NULL, /* next */
970 0, /* static_pass_number */
971 0, /* tv_id */
972 PROP_cfg, /* properties_required */
973 0, /* properties_provided */
974 0, /* properties_destroyed */
975 0, /* todo_flags_start */
976 TODO_dump_func | TODO_rename_vars /* todo_flags_finish */
977 | TODO_ggc_collect | TODO_verify_ssa
978 | TODO_verify_stmts | TODO_verify_flow,
979 0 /* letter */
982 struct tree_opt_pass pass_pre_expand =
984 "oplower", /* name */
985 0, /* gate */
986 tree_lower_operations, /* execute */
987 NULL, /* sub */
988 NULL, /* next */
989 0, /* static_pass_number */
990 0, /* tv_id */
991 PROP_cfg, /* properties_required */
992 0, /* properties_provided */
993 0, /* properties_destroyed */
994 0, /* todo_flags_start */
995 TODO_dump_func | TODO_ggc_collect
996 | TODO_verify_stmts, /* todo_flags_finish */
997 0 /* letter */
1000 #include "gt-tree-complex.h"