* config/i386/i386.c (ix86_expand_prologue): If the function uses a
[official-gcc.git] / gcc / tree-complex.c
bloba960f92c42427b76bfa17426a978bffa9a6a103b
1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "expr.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
30 #include "optabs.h"
31 #include "machmode.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "flags.h"
40 /* Extract the real or imaginary part of a complex variable or constant.
41 Make sure that it's a proper gimple_val and gimplify it if not.
42 Emit any new code before BSI. */
44 static tree
45 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
47 tree ret, inner_type;
49 inner_type = TREE_TYPE (TREE_TYPE (t));
50 switch (TREE_CODE (t))
52 case COMPLEX_CST:
53 ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
54 break;
56 case COMPLEX_EXPR:
57 ret = TREE_OPERAND (t, imagpart_p);
58 break;
60 case VAR_DECL:
61 case PARM_DECL:
62 ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
63 inner_type, t);
64 break;
66 default:
67 abort ();
70 return gimplify_val (bsi, inner_type, ret);
73 /* Update an assignment to a complex variable in place. */
75 static void
76 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
78 tree stmt = bsi_stmt (*bsi);
79 tree type;
81 modify_stmt (stmt);
82 if (TREE_CODE (stmt) == RETURN_EXPR)
83 stmt = TREE_OPERAND (stmt, 0);
85 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
86 TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
89 /* Expand complex addition to scalars:
90 a + b = (ar + br) + i(ai + bi)
91 a - b = (ar - br) + i(ai + bi)
94 static void
95 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
96 tree ar, tree ai, tree br, tree bi,
97 enum tree_code code)
99 tree rr, ri;
101 rr = gimplify_build2 (bsi, code, inner_type, ar, br);
102 ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
104 update_complex_assignment (bsi, rr, ri);
107 /* Expand complex multiplication to scalars:
108 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
111 static void
112 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
113 tree ar, tree ai, tree br, tree bi)
115 tree t1, t2, t3, t4, rr, ri;
117 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
118 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
119 t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
121 /* Avoid expanding redundant multiplication for the common
122 case of squaring a complex number. */
123 if (ar == br && ai == bi)
124 t4 = t3;
125 else
126 t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
128 rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
129 ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
131 update_complex_assignment (bsi, rr, ri);
134 /* Expand complex division to scalars, straightforward algorithm.
135 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
136 t = br*br + bi*bi
139 static void
140 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
141 tree ar, tree ai, tree br, tree bi,
142 enum tree_code code)
144 tree rr, ri, div, t1, t2, t3;
146 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
147 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
148 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
150 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
151 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
152 t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
153 rr = gimplify_build2 (bsi, code, inner_type, t3, div);
155 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
156 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
157 t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
158 ri = gimplify_build2 (bsi, code, inner_type, t3, div);
160 update_complex_assignment (bsi, rr, ri);
163 /* Expand complex division to scalars, modified algorithm to minimize
164 overflow with wide input ranges. */
166 static void
167 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
168 tree ar, tree ai, tree br, tree bi,
169 enum tree_code code)
171 tree rr, ri, ratio, div, t1, t2, min, max, cond;
173 /* Examine |br| < |bi|, and branch. */
174 t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
175 t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
176 cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
177 STRIP_NOPS (cond);
179 if (TREE_CONSTANT (cond))
181 if (integer_zerop (cond))
182 min = bi, max = br;
183 else
184 min = br, max = bi;
186 else
188 basic_block bb_cond, bb_true, bb_false, bb_join;
189 tree l1, l2, l3;
190 edge e;
192 l1 = create_artificial_label ();
193 t1 = build (GOTO_EXPR, void_type_node, l1);
194 l2 = create_artificial_label ();
195 t2 = build (GOTO_EXPR, void_type_node, l2);
196 cond = build (COND_EXPR, void_type_node, cond, t1, t2);
197 bsi_insert_before (bsi, cond, BSI_SAME_STMT);
199 min = make_rename_temp (inner_type, NULL);
200 max = make_rename_temp (inner_type, NULL);
201 l3 = create_artificial_label ();
203 /* Split the original block, and create the TRUE and FALSE blocks. */
204 e = split_block (bsi->bb, cond);
205 bb_cond = e->src;
206 bb_join = e->dest;
207 bb_true = create_empty_bb (bb_cond);
208 bb_false = create_empty_bb (bb_true);
210 /* Wire the blocks together. */
211 e->flags = EDGE_TRUE_VALUE;
212 redirect_edge_succ (e, bb_true);
213 make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
214 make_edge (bb_true, bb_join, 0);
215 make_edge (bb_false, bb_join, 0);
217 /* Update dominance info. Note that bb_join's data was
218 updated by split_block. */
219 if (dom_computed[CDI_DOMINATORS] >= DOM_CONS_OK)
221 set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
222 set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
225 /* Compute min and max for TRUE block. */
226 *bsi = bsi_start (bb_true);
227 t1 = build (LABEL_EXPR, void_type_node, l1);
228 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
229 t1 = build (MODIFY_EXPR, inner_type, min, br);
230 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
231 t1 = build (MODIFY_EXPR, inner_type, max, bi);
232 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
234 /* Compute min and max for FALSE block. */
235 *bsi = bsi_start (bb_false);
236 t1 = build (LABEL_EXPR, void_type_node, l2);
237 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
238 t1 = build (MODIFY_EXPR, inner_type, min, bi);
239 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
240 t1 = build (MODIFY_EXPR, inner_type, max, br);
241 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
243 /* Insert the join label into the tail of the original block. */
244 *bsi = bsi_start (bb_join);
245 t1 = build (LABEL_EXPR, void_type_node, l3);
246 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
249 /* Now we have MIN(|br|, |bi|) and MAX(|br|, |bi|). We now use the
250 ratio min/max to scale both the dividend and divisor. */
251 ratio = gimplify_build2 (bsi, code, inner_type, min, max);
253 /* Calculate the divisor: min*ratio + max. */
254 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, min, ratio);
255 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, max);
257 /* Result is now ((ar + ai*ratio)/div) + i((ai - ar*ratio)/div). */
258 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
259 t2 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, ar, t1);
260 rr = gimplify_build2 (bsi, code, inner_type, t2, div);
262 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
263 t2 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
264 ri = gimplify_build2 (bsi, code, inner_type, t2, div);
266 update_complex_assignment (bsi, rr, ri);
269 /* Expand complex division to scalars. */
271 static void
272 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
273 tree ar, tree ai, tree br, tree bi,
274 enum tree_code code)
276 switch (flag_complex_divide_method)
278 case 0:
279 /* straightforward implementation of complex divide acceptable. */
280 expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
281 break;
282 case 1:
283 /* wide ranges of inputs must work for complex divide. */
284 expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
285 break;
286 default:
287 /* C99-like requirements for complex divide (not yet implemented). */
288 abort ();
292 /* Expand complex negation to scalars:
293 -a = (-ar) + i(-ai)
296 static void
297 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
298 tree ar, tree ai)
300 tree rr, ri;
302 rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
303 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
305 update_complex_assignment (bsi, rr, ri);
308 /* Expand complex conjugate to scalars:
309 ~a = (ar) + i(-ai)
312 static void
313 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
314 tree ar, tree ai)
316 tree ri;
318 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
320 update_complex_assignment (bsi, ar, ri);
323 /* Expand complex comparison (EQ or NE only). */
325 static void
326 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
327 tree br, tree bi, enum tree_code code)
329 tree cr, ci, cc, stmt, type;
331 cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
332 ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
333 cc = gimplify_build2 (bsi,
334 (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
335 boolean_type_node, cr, ci);
337 stmt = bsi_stmt (*bsi);
338 modify_stmt (stmt);
340 switch (TREE_CODE (stmt))
342 case RETURN_EXPR:
343 stmt = TREE_OPERAND (stmt, 0);
344 /* FALLTHRU */
345 case MODIFY_EXPR:
346 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
347 TREE_OPERAND (stmt, 1) = fold_convert (type, cc);
348 break;
349 case COND_EXPR:
350 TREE_OPERAND (stmt, 0) = cc;
351 break;
352 default:
353 abort ();
357 /* Process one statement. If we identify a complex operation, expand it. */
359 static void
360 expand_complex_operations_1 (block_stmt_iterator *bsi)
362 tree stmt = bsi_stmt (*bsi);
363 tree rhs, type, inner_type;
364 tree ac, ar, ai, bc, br, bi;
365 enum tree_code code;
367 switch (TREE_CODE (stmt))
369 case RETURN_EXPR:
370 stmt = TREE_OPERAND (stmt, 0);
371 if (!stmt)
372 return;
373 if (TREE_CODE (stmt) != MODIFY_EXPR)
374 return;
375 /* FALLTHRU */
377 case MODIFY_EXPR:
378 rhs = TREE_OPERAND (stmt, 1);
379 break;
381 case COND_EXPR:
382 rhs = TREE_OPERAND (stmt, 0);
383 break;
385 default:
386 return;
389 type = TREE_TYPE (rhs);
390 code = TREE_CODE (rhs);
392 /* Initial filter for operations we handle. */
393 switch (code)
395 case PLUS_EXPR:
396 case MINUS_EXPR:
397 case MULT_EXPR:
398 case TRUNC_DIV_EXPR:
399 case CEIL_DIV_EXPR:
400 case FLOOR_DIV_EXPR:
401 case ROUND_DIV_EXPR:
402 case RDIV_EXPR:
403 case NEGATE_EXPR:
404 case CONJ_EXPR:
405 if (TREE_CODE (type) != COMPLEX_TYPE)
406 return;
407 inner_type = TREE_TYPE (type);
408 break;
410 case EQ_EXPR:
411 case NE_EXPR:
412 inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
413 if (TREE_CODE (inner_type) != COMPLEX_TYPE)
414 return;
415 break;
417 default:
418 return;
421 /* Extract the components of the two complex values. Make sure and
422 handle the common case of the same value used twice specially. */
423 ac = TREE_OPERAND (rhs, 0);
424 ar = extract_component (bsi, ac, 0);
425 ai = extract_component (bsi, ac, 1);
427 if (TREE_CODE_CLASS (code) == '1')
428 bc = br = bi = NULL;
429 else
431 bc = TREE_OPERAND (rhs, 1);
432 if (ac == bc)
433 br = ar, bi = ai;
434 else
436 br = extract_component (bsi, bc, 0);
437 bi = extract_component (bsi, bc, 1);
441 switch (code)
443 case PLUS_EXPR:
444 case MINUS_EXPR:
445 expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
446 break;
448 case MULT_EXPR:
449 expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
450 break;
452 case TRUNC_DIV_EXPR:
453 case CEIL_DIV_EXPR:
454 case FLOOR_DIV_EXPR:
455 case ROUND_DIV_EXPR:
456 case RDIV_EXPR:
457 expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
458 break;
460 case NEGATE_EXPR:
461 expand_complex_negation (bsi, inner_type, ar, ai);
462 break;
464 case CONJ_EXPR:
465 expand_complex_conjugate (bsi, inner_type, ar, ai);
466 break;
468 case EQ_EXPR:
469 case NE_EXPR:
470 expand_complex_comparison (bsi, ar, ai, br, bi, code);
471 break;
473 default:
474 abort ();
478 /* Build a constant of type TYPE, made of VALUE's bits replicated
479 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
480 static tree
481 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
483 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
484 int n = HOST_BITS_PER_WIDE_INT / width;
485 unsigned HOST_WIDE_INT low, high, mask;
486 tree ret;
488 if (n == 0)
489 abort ();
491 if (width == HOST_BITS_PER_WIDE_INT)
492 low = value;
493 else
495 mask = ((HOST_WIDE_INT)1 << width) - 1;
496 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
499 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
500 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
501 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
502 high = 0;
503 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
504 high = low;
505 else
506 abort ();
508 ret = build_int_2 (low, high);
509 TREE_TYPE (ret) = type;
510 return ret;
513 /* Return a suitable vector types made of SUBPARTS units each of mode
514 "word_mode" (the global variable). */
515 static tree
516 build_word_mode_vector_type (int nunits)
518 static tree innertype;
519 static tree last;
520 static int last_nunits;
522 if (!innertype)
523 innertype = lang_hooks.types.type_for_mode (word_mode, 1);
524 else if (last_nunits == nunits)
525 return last;
527 /* We build a new type, but we canonicalize it nevertheless,
528 because it still saves some memory. */
529 last_nunits = nunits;
530 last = type_hash_canon (nunits, build_vector_type (innertype, nunits));
531 return last;
534 typedef tree (*elem_op_func) (block_stmt_iterator *,
535 tree, tree, tree, tree, tree, enum tree_code);
537 static inline tree
538 tree_vec_extract (block_stmt_iterator *bsi, tree type,
539 tree t, tree bitsize, tree bitpos)
541 if (bitpos)
542 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
543 else
544 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
547 static tree
548 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
549 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
550 enum tree_code code)
552 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
553 return gimplify_build1 (bsi, code, inner_type, a);
556 static tree
557 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
558 tree bitpos, tree bitsize, enum tree_code code)
560 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
561 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
562 return gimplify_build2 (bsi, code, inner_type, a, b);
565 /* Expand vector addition to scalars. This does bit twiddling
566 in order to increase parallelism:
568 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
569 (a ^ b) & 0x80808080
571 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
572 (a ^ ~b) & 0x80808080
574 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
576 This optimization should be done only if 4 vector items or more
577 fit into a word. */
578 static tree
579 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
580 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
581 enum tree_code code)
583 tree inner_type = TREE_TYPE (TREE_TYPE (a));
584 unsigned HOST_WIDE_INT max;
585 tree low_bits, high_bits, a_low, b_low, result_low, signs;
587 max = GET_MODE_MASK (TYPE_MODE (inner_type));
588 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
589 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
591 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
592 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
594 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
595 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
596 if (code == PLUS_EXPR)
597 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
598 else
600 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
601 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
604 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
605 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
606 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
609 static tree
610 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
611 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
612 tree bitsize ATTRIBUTE_UNUSED,
613 enum tree_code code ATTRIBUTE_UNUSED)
615 tree inner_type = TREE_TYPE (TREE_TYPE (b));
616 HOST_WIDE_INT max;
617 tree low_bits, high_bits, b_low, result_low, signs;
619 max = GET_MODE_MASK (TYPE_MODE (inner_type));
620 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
621 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
623 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
625 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
626 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
627 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
628 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
629 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
632 /* Expand a vector operation to scalars, by using many operations
633 whose type is the vector type's inner type. */
634 static tree
635 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
636 tree type, tree inner_type,
637 tree a, tree b, enum tree_code code)
639 tree head, *chain = &head;
640 tree part_width = TYPE_SIZE (inner_type);
641 tree index = bitsize_int (0);
642 int nunits = TYPE_VECTOR_SUBPARTS (type);
643 int delta = tree_low_cst (part_width, 1)
644 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
645 int i;
647 for (i = 0; i < nunits;
648 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
650 tree result = f (bsi, inner_type, a, b, index, part_width, code);
651 *chain = tree_cons (NULL_TREE, result, NULL_TREE);
652 chain = &TREE_CHAIN (*chain);
655 return build1 (CONSTRUCTOR, type, head);
658 /* Expand a vector operation to scalars with the freedom to use
659 a scalar integer type, or to use a different size for the items
660 in the vector type. */
661 static tree
662 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
663 tree a, tree b,
664 enum tree_code code)
666 tree result, compute_type;
667 enum machine_mode mode;
668 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
670 /* We have three strategies. If the type is already correct, just do
671 the operation an element at a time. Else, if the vector is wider than
672 one word, do it a word at a time; finally, if the vector is smaller
673 than one word, do it as a scalar. */
674 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
675 return expand_vector_piecewise (bsi, f,
676 type, TREE_TYPE (type),
677 a, b, code);
678 else if (n_words > 1)
680 tree word_type = build_word_mode_vector_type (n_words);
681 result = expand_vector_piecewise (bsi, f,
682 word_type, TREE_TYPE (word_type),
683 a, b, code);
684 result = gimplify_val (bsi, word_type, result);
686 else
688 /* Use a single scalar operation with a mode no wider than word_mode. */
689 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
690 compute_type = lang_hooks.types.type_for_mode (mode, 1);
691 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
694 return build1 (VIEW_CONVERT_EXPR, type, result);
697 /* Expand a vector operation to scalars; for integer types we can use
698 special bit twiddling tricks to do the sums a word at a time, using
699 function F_PARALLEL instead of F. These tricks are done only if
700 they can process at least four items, that is, only if the vector
701 holds at least four items and if a word can hold four items. */
702 static tree
703 expand_vector_addition (block_stmt_iterator *bsi,
704 elem_op_func f, elem_op_func f_parallel,
705 tree type, tree a, tree b, enum tree_code code)
707 int parts_per_word = UNITS_PER_WORD
708 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
710 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
711 && parts_per_word >= 4
712 && TYPE_VECTOR_SUBPARTS (type) >= 4)
713 return expand_vector_parallel (bsi, f_parallel,
714 type, a, b, code);
715 else
716 return expand_vector_piecewise (bsi, f,
717 type, TREE_TYPE (type),
718 a, b, code);
721 /* Return a type for the widest vector mode whose components are of mode
722 INNER_MODE, or NULL_TREE if none is found. */
723 static tree
724 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
726 enum machine_mode best_mode = VOIDmode, mode;
727 int best_nunits = 0;
729 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
730 mode = MIN_MODE_VECTOR_FLOAT;
731 else
732 mode = MIN_MODE_VECTOR_INT;
734 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
735 if (GET_MODE_INNER (mode) == inner_mode
736 && GET_MODE_NUNITS (mode) > best_nunits
737 && op->handlers[mode].insn_code != CODE_FOR_nothing)
738 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
740 if (best_mode == VOIDmode)
741 return NULL_TREE;
742 else
743 return lang_hooks.types.type_for_mode (best_mode, 1);
746 /* Process one statement. If we identify a vector operation, expand it. */
748 static void
749 expand_vector_operations_1 (block_stmt_iterator *bsi)
751 tree stmt = bsi_stmt (*bsi);
752 tree *p_rhs, rhs, type, compute_type;
753 enum tree_code code;
754 enum machine_mode compute_mode;
755 optab op;
757 switch (TREE_CODE (stmt))
759 case RETURN_EXPR:
760 stmt = TREE_OPERAND (stmt, 0);
761 if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
762 return;
764 /* FALLTHRU */
766 case MODIFY_EXPR:
767 p_rhs = &TREE_OPERAND (stmt, 1);
768 rhs = *p_rhs;
769 break;
771 default:
772 return;
775 type = TREE_TYPE (rhs);
776 if (TREE_CODE (type) != VECTOR_TYPE)
777 return;
779 code = TREE_CODE (rhs);
780 if (TREE_CODE_CLASS (code) != '1'
781 && TREE_CODE_CLASS (code) != '2')
782 return;
784 if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
785 return;
787 if (code == CONVERT_EXPR)
788 abort ();
790 op = optab_for_tree_code (code, type);
792 /* Optabs will try converting a negation into a subtraction, so
793 look for it as well. TODO: negation of floating-point vectors
794 might be turned into an exclusive OR toggling the sign bit. */
795 if (op == NULL
796 && code == NEGATE_EXPR
797 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
798 op = optab_for_tree_code (MINUS_EXPR, type);
800 /* For very wide vectors, try using a smaller vector mode. */
801 compute_type = type;
802 if (TYPE_MODE (type) == BLKmode && op)
804 tree vector_compute_type
805 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
806 if (vector_compute_type != NULL_TREE)
807 compute_type = vector_compute_type;
810 compute_mode = TYPE_MODE (compute_type);
812 /* If we are breaking a BLKmode vector into smaller pieces,
813 type_for_widest_vector_mode has already looked into the optab,
814 so skip these checks. */
815 if (compute_type == type)
817 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
818 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
819 && op != NULL
820 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
821 return;
822 else
824 /* There is no operation in hardware, so fall back to scalars. */
825 compute_type = TREE_TYPE (type);
826 compute_mode = TYPE_MODE (compute_type);
830 /* If the compute mode is not a vector mode (hence we are decomposing
831 a BLKmode vector to smaller, hardware-supported vectors), we may
832 want to expand the operations in parallel. */
833 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
834 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
835 switch (code)
837 case PLUS_EXPR:
838 case MINUS_EXPR:
839 if (TYPE_TRAP_SIGNED (type))
840 break;
842 *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
843 TREE_OPERAND (rhs, 0),
844 TREE_OPERAND (rhs, 1), code);
845 modify_stmt (bsi_stmt (*bsi));
846 return;
848 case NEGATE_EXPR:
849 if (TYPE_TRAP_SIGNED (type))
850 break;
852 *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
853 TREE_OPERAND (rhs, 0),
854 NULL_TREE, code);
855 modify_stmt (bsi_stmt (*bsi));
856 return;
858 case BIT_AND_EXPR:
859 case BIT_IOR_EXPR:
860 case BIT_XOR_EXPR:
861 *p_rhs = expand_vector_parallel (bsi, do_binop, type,
862 TREE_OPERAND (rhs, 0),
863 TREE_OPERAND (rhs, 1), code);
864 modify_stmt (bsi_stmt (*bsi));
865 return;
867 case BIT_NOT_EXPR:
868 *p_rhs = expand_vector_parallel (bsi, do_unop, type,
869 TREE_OPERAND (rhs, 0),
870 NULL_TREE, code);
871 modify_stmt (bsi_stmt (*bsi));
872 return;
874 default:
875 break;
878 if (TREE_CODE_CLASS (code) == '1')
879 *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
880 TREE_OPERAND (rhs, 0),
881 NULL_TREE, code);
882 else
883 *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
884 TREE_OPERAND (rhs, 0),
885 TREE_OPERAND (rhs, 1), code);
887 modify_stmt (bsi_stmt (*bsi));
890 static void
891 expand_vector_operations (void)
893 block_stmt_iterator bsi;
894 basic_block bb;
896 FOR_EACH_BB (bb)
898 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
899 expand_vector_operations_1 (&bsi);
903 static void
904 tree_lower_operations (void)
906 int old_last_basic_block = last_basic_block;
907 block_stmt_iterator bsi;
908 basic_block bb;
910 FOR_EACH_BB (bb)
912 if (bb->index >= old_last_basic_block)
913 continue;
914 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
916 expand_complex_operations_1 (&bsi);
917 expand_vector_operations_1 (&bsi);
923 struct tree_opt_pass pass_lower_vector_ssa =
925 "vector", /* name */
926 NULL, /* gate */
927 expand_vector_operations, /* execute */
928 NULL, /* sub */
929 NULL, /* next */
930 0, /* static_pass_number */
931 0, /* tv_id */
932 PROP_cfg, /* properties_required */
933 0, /* properties_provided */
934 0, /* properties_destroyed */
935 0, /* todo_flags_start */
936 TODO_dump_func | TODO_rename_vars /* todo_flags_finish */
937 | TODO_ggc_collect | TODO_verify_ssa
938 | TODO_verify_stmts | TODO_verify_flow
941 struct tree_opt_pass pass_pre_expand =
943 "oplower", /* name */
944 0, /* gate */
945 tree_lower_operations, /* execute */
946 NULL, /* sub */
947 NULL, /* next */
948 0, /* static_pass_number */
949 0, /* tv_id */
950 PROP_cfg, /* properties_required */
951 0, /* properties_provided */
952 0, /* properties_destroyed */
953 0, /* todo_flags_start */
954 TODO_dump_func | TODO_ggc_collect
955 | TODO_verify_stmts /* todo_flags_finish */