PR tree-optimization/17468
[official-gcc.git] / gcc / tree-complex.c
blob363e3a2d578e6137d11b77b6081d916147b5d021
1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "expr.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
30 #include "optabs.h"
31 #include "machmode.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "flags.h"
40 /* Extract the real or imaginary part of a complex variable or constant.
41 Make sure that it's a proper gimple_val and gimplify it if not.
42 Emit any new code before BSI. */
44 static tree
45 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
47 tree ret, inner_type;
49 inner_type = TREE_TYPE (TREE_TYPE (t));
50 switch (TREE_CODE (t))
52 case COMPLEX_CST:
53 ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
54 break;
56 case COMPLEX_EXPR:
57 ret = TREE_OPERAND (t, imagpart_p);
58 break;
60 case VAR_DECL:
61 case PARM_DECL:
62 ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
63 inner_type, t);
64 break;
66 default:
67 gcc_unreachable ();
70 return gimplify_val (bsi, inner_type, ret);
73 /* Update an assignment to a complex variable in place. */
75 static void
76 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
78 tree stmt = bsi_stmt (*bsi);
79 tree type;
81 if (TREE_CODE (stmt) == RETURN_EXPR)
82 stmt = TREE_OPERAND (stmt, 0);
84 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
85 TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
86 modify_stmt (stmt);
89 /* Expand complex addition to scalars:
90 a + b = (ar + br) + i(ai + bi)
91 a - b = (ar - br) + i(ai + bi)
94 static void
95 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
96 tree ar, tree ai, tree br, tree bi,
97 enum tree_code code)
99 tree rr, ri;
101 rr = gimplify_build2 (bsi, code, inner_type, ar, br);
102 ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
104 update_complex_assignment (bsi, rr, ri);
107 /* Expand complex multiplication to scalars:
108 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
111 static void
112 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
113 tree ar, tree ai, tree br, tree bi)
115 tree t1, t2, t3, t4, rr, ri;
117 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
118 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
119 t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
121 /* Avoid expanding redundant multiplication for the common
122 case of squaring a complex number. */
123 if (ar == br && ai == bi)
124 t4 = t3;
125 else
126 t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
128 rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
129 ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
131 update_complex_assignment (bsi, rr, ri);
134 /* Expand complex division to scalars, straightforward algorithm.
135 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
136 t = br*br + bi*bi
139 static void
140 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
141 tree ar, tree ai, tree br, tree bi,
142 enum tree_code code)
144 tree rr, ri, div, t1, t2, t3;
146 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
147 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
148 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
150 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
151 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
152 t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
153 rr = gimplify_build2 (bsi, code, inner_type, t3, div);
155 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
156 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
157 t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
158 ri = gimplify_build2 (bsi, code, inner_type, t3, div);
160 update_complex_assignment (bsi, rr, ri);
163 /* Expand complex division to scalars, modified algorithm to minimize
164 overflow with wide input ranges. */
166 static void
167 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
168 tree ar, tree ai, tree br, tree bi,
169 enum tree_code code)
171 tree rr, ri, ratio, div, t1, t2, min, max, cond;
173 /* Examine |br| < |bi|, and branch. */
174 t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
175 t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
176 cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
177 STRIP_NOPS (cond);
179 if (TREE_CONSTANT (cond))
181 if (integer_zerop (cond))
182 min = bi, max = br;
183 else
184 min = br, max = bi;
186 else
188 basic_block bb_cond, bb_true, bb_false, bb_join;
189 tree l1, l2, l3;
190 edge e;
192 l1 = create_artificial_label ();
193 t1 = build (GOTO_EXPR, void_type_node, l1);
194 l2 = create_artificial_label ();
195 t2 = build (GOTO_EXPR, void_type_node, l2);
196 cond = build (COND_EXPR, void_type_node, cond, t1, t2);
197 bsi_insert_before (bsi, cond, BSI_SAME_STMT);
199 min = make_rename_temp (inner_type, NULL);
200 max = make_rename_temp (inner_type, NULL);
201 l3 = create_artificial_label ();
203 /* Split the original block, and create the TRUE and FALSE blocks. */
204 e = split_block (bsi->bb, cond);
205 bb_cond = e->src;
206 bb_join = e->dest;
207 bb_true = create_empty_bb (bb_cond);
208 bb_false = create_empty_bb (bb_true);
210 /* Wire the blocks together. */
211 e->flags = EDGE_TRUE_VALUE;
212 redirect_edge_succ (e, bb_true);
213 make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
214 make_edge (bb_true, bb_join, 0);
215 make_edge (bb_false, bb_join, 0);
217 /* Update dominance info. Note that bb_join's data was
218 updated by split_block. */
219 if (dom_computed[CDI_DOMINATORS] >= DOM_CONS_OK)
221 set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
222 set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
225 /* Compute min and max for TRUE block. */
226 *bsi = bsi_start (bb_true);
227 t1 = build (LABEL_EXPR, void_type_node, l1);
228 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
229 t1 = build (MODIFY_EXPR, inner_type, min, br);
230 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
231 t1 = build (MODIFY_EXPR, inner_type, max, bi);
232 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
234 /* Compute min and max for FALSE block. */
235 *bsi = bsi_start (bb_false);
236 t1 = build (LABEL_EXPR, void_type_node, l2);
237 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
238 t1 = build (MODIFY_EXPR, inner_type, min, bi);
239 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
240 t1 = build (MODIFY_EXPR, inner_type, max, br);
241 bsi_insert_after (bsi, t1, BSI_NEW_STMT);
243 /* Insert the join label into the tail of the original block. */
244 *bsi = bsi_start (bb_join);
245 t1 = build (LABEL_EXPR, void_type_node, l3);
246 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
249 /* Now we have MIN(|br|, |bi|) and MAX(|br|, |bi|). We now use the
250 ratio min/max to scale both the dividend and divisor. */
251 ratio = gimplify_build2 (bsi, code, inner_type, min, max);
253 /* Calculate the divisor: min*ratio + max. */
254 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, min, ratio);
255 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, max);
257 /* Result is now ((ar + ai*ratio)/div) + i((ai - ar*ratio)/div). */
258 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
259 t2 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, ar, t1);
260 rr = gimplify_build2 (bsi, code, inner_type, t2, div);
262 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
263 t2 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
264 ri = gimplify_build2 (bsi, code, inner_type, t2, div);
266 update_complex_assignment (bsi, rr, ri);
269 /* Expand complex division to scalars. */
271 static void
272 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
273 tree ar, tree ai, tree br, tree bi,
274 enum tree_code code)
276 switch (flag_complex_divide_method)
278 case 0:
279 /* straightforward implementation of complex divide acceptable. */
280 expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
281 break;
282 case 1:
283 /* wide ranges of inputs must work for complex divide. */
284 expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
285 break;
286 default:
287 /* C99-like requirements for complex divide (not yet implemented). */
288 gcc_unreachable ();
292 /* Expand complex negation to scalars:
293 -a = (-ar) + i(-ai)
296 static void
297 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
298 tree ar, tree ai)
300 tree rr, ri;
302 rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
303 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
305 update_complex_assignment (bsi, rr, ri);
308 /* Expand complex conjugate to scalars:
309 ~a = (ar) + i(-ai)
312 static void
313 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
314 tree ar, tree ai)
316 tree ri;
318 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
320 update_complex_assignment (bsi, ar, ri);
323 /* Expand complex comparison (EQ or NE only). */
325 static void
326 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
327 tree br, tree bi, enum tree_code code)
329 tree cr, ci, cc, stmt, expr, type;
331 cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
332 ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
333 cc = gimplify_build2 (bsi,
334 (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
335 boolean_type_node, cr, ci);
337 stmt = expr = bsi_stmt (*bsi);
339 switch (TREE_CODE (stmt))
341 case RETURN_EXPR:
342 expr = TREE_OPERAND (stmt, 0);
343 /* FALLTHRU */
344 case MODIFY_EXPR:
345 type = TREE_TYPE (TREE_OPERAND (expr, 1));
346 TREE_OPERAND (expr, 1) = fold_convert (type, cc);
347 break;
348 case COND_EXPR:
349 TREE_OPERAND (stmt, 0) = cc;
350 break;
351 default:
352 gcc_unreachable ();
355 modify_stmt (stmt);
358 /* Process one statement. If we identify a complex operation, expand it. */
360 static void
361 expand_complex_operations_1 (block_stmt_iterator *bsi)
363 tree stmt = bsi_stmt (*bsi);
364 tree rhs, type, inner_type;
365 tree ac, ar, ai, bc, br, bi;
366 enum tree_code code;
368 switch (TREE_CODE (stmt))
370 case RETURN_EXPR:
371 stmt = TREE_OPERAND (stmt, 0);
372 if (!stmt)
373 return;
374 if (TREE_CODE (stmt) != MODIFY_EXPR)
375 return;
376 /* FALLTHRU */
378 case MODIFY_EXPR:
379 rhs = TREE_OPERAND (stmt, 1);
380 break;
382 case COND_EXPR:
383 rhs = TREE_OPERAND (stmt, 0);
384 break;
386 default:
387 return;
390 type = TREE_TYPE (rhs);
391 code = TREE_CODE (rhs);
393 /* Initial filter for operations we handle. */
394 switch (code)
396 case PLUS_EXPR:
397 case MINUS_EXPR:
398 case MULT_EXPR:
399 case TRUNC_DIV_EXPR:
400 case CEIL_DIV_EXPR:
401 case FLOOR_DIV_EXPR:
402 case ROUND_DIV_EXPR:
403 case RDIV_EXPR:
404 case NEGATE_EXPR:
405 case CONJ_EXPR:
406 if (TREE_CODE (type) != COMPLEX_TYPE)
407 return;
408 inner_type = TREE_TYPE (type);
409 break;
411 case EQ_EXPR:
412 case NE_EXPR:
413 inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
414 if (TREE_CODE (inner_type) != COMPLEX_TYPE)
415 return;
416 break;
418 default:
419 return;
422 /* Extract the components of the two complex values. Make sure and
423 handle the common case of the same value used twice specially. */
424 ac = TREE_OPERAND (rhs, 0);
425 ar = extract_component (bsi, ac, 0);
426 ai = extract_component (bsi, ac, 1);
428 if (TREE_CODE_CLASS (code) == '1')
429 bc = br = bi = NULL;
430 else
432 bc = TREE_OPERAND (rhs, 1);
433 if (ac == bc)
434 br = ar, bi = ai;
435 else
437 br = extract_component (bsi, bc, 0);
438 bi = extract_component (bsi, bc, 1);
442 switch (code)
444 case PLUS_EXPR:
445 case MINUS_EXPR:
446 expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
447 break;
449 case MULT_EXPR:
450 expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
451 break;
453 case TRUNC_DIV_EXPR:
454 case CEIL_DIV_EXPR:
455 case FLOOR_DIV_EXPR:
456 case ROUND_DIV_EXPR:
457 case RDIV_EXPR:
458 expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
459 break;
461 case NEGATE_EXPR:
462 expand_complex_negation (bsi, inner_type, ar, ai);
463 break;
465 case CONJ_EXPR:
466 expand_complex_conjugate (bsi, inner_type, ar, ai);
467 break;
469 case EQ_EXPR:
470 case NE_EXPR:
471 expand_complex_comparison (bsi, ar, ai, br, bi, code);
472 break;
474 default:
475 gcc_unreachable ();
479 /* Build a constant of type TYPE, made of VALUE's bits replicated
480 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
481 static tree
482 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
484 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
485 int n = HOST_BITS_PER_WIDE_INT / width;
486 unsigned HOST_WIDE_INT low, high, mask;
487 tree ret;
489 gcc_assert (n);
491 if (width == HOST_BITS_PER_WIDE_INT)
492 low = value;
493 else
495 mask = ((HOST_WIDE_INT)1 << width) - 1;
496 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
499 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
500 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
501 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
502 high = 0;
503 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
504 high = low;
505 else
506 gcc_unreachable ();
508 ret = build_int_cst_wide (type, low, high);
509 return ret;
512 /* Return a suitable vector types made of SUBPARTS units each of mode
513 "word_mode" (the global variable). */
514 static tree
515 build_word_mode_vector_type (int nunits)
517 static tree innertype;
518 static tree last;
519 static int last_nunits;
521 if (!innertype)
522 innertype = lang_hooks.types.type_for_mode (word_mode, 1);
523 else if (last_nunits == nunits)
524 return last;
526 /* We build a new type, but we canonicalize it nevertheless,
527 because it still saves some memory. */
528 last_nunits = nunits;
529 last = type_hash_canon (nunits, build_vector_type (innertype, nunits));
530 return last;
533 typedef tree (*elem_op_func) (block_stmt_iterator *,
534 tree, tree, tree, tree, tree, enum tree_code);
536 static inline tree
537 tree_vec_extract (block_stmt_iterator *bsi, tree type,
538 tree t, tree bitsize, tree bitpos)
540 if (bitpos)
541 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
542 else
543 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
546 static tree
547 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
548 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
549 enum tree_code code)
551 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
552 return gimplify_build1 (bsi, code, inner_type, a);
555 static tree
556 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
557 tree bitpos, tree bitsize, enum tree_code code)
559 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
560 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
561 return gimplify_build2 (bsi, code, inner_type, a, b);
564 /* Expand vector addition to scalars. This does bit twiddling
565 in order to increase parallelism:
567 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
568 (a ^ b) & 0x80808080
570 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
571 (a ^ ~b) & 0x80808080
573 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
575 This optimization should be done only if 4 vector items or more
576 fit into a word. */
577 static tree
578 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
579 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
580 enum tree_code code)
582 tree inner_type = TREE_TYPE (TREE_TYPE (a));
583 unsigned HOST_WIDE_INT max;
584 tree low_bits, high_bits, a_low, b_low, result_low, signs;
586 max = GET_MODE_MASK (TYPE_MODE (inner_type));
587 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
588 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
590 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
591 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
593 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
594 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
595 if (code == PLUS_EXPR)
596 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
597 else
599 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
600 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
603 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
604 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
605 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
608 static tree
609 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
610 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
611 tree bitsize ATTRIBUTE_UNUSED,
612 enum tree_code code ATTRIBUTE_UNUSED)
614 tree inner_type = TREE_TYPE (TREE_TYPE (b));
615 HOST_WIDE_INT max;
616 tree low_bits, high_bits, b_low, result_low, signs;
618 max = GET_MODE_MASK (TYPE_MODE (inner_type));
619 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
620 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
622 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
624 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
625 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
626 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
627 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
628 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
631 /* Expand a vector operation to scalars, by using many operations
632 whose type is the vector type's inner type. */
633 static tree
634 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
635 tree type, tree inner_type,
636 tree a, tree b, enum tree_code code)
638 tree head, *chain = &head;
639 tree part_width = TYPE_SIZE (inner_type);
640 tree index = bitsize_int (0);
641 int nunits = TYPE_VECTOR_SUBPARTS (type);
642 int delta = tree_low_cst (part_width, 1)
643 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
644 int i;
646 for (i = 0; i < nunits;
647 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
649 tree result = f (bsi, inner_type, a, b, index, part_width, code);
650 *chain = tree_cons (NULL_TREE, result, NULL_TREE);
651 chain = &TREE_CHAIN (*chain);
654 return build1 (CONSTRUCTOR, type, head);
657 /* Expand a vector operation to scalars with the freedom to use
658 a scalar integer type, or to use a different size for the items
659 in the vector type. */
660 static tree
661 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
662 tree a, tree b,
663 enum tree_code code)
665 tree result, compute_type;
666 enum machine_mode mode;
667 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
669 /* We have three strategies. If the type is already correct, just do
670 the operation an element at a time. Else, if the vector is wider than
671 one word, do it a word at a time; finally, if the vector is smaller
672 than one word, do it as a scalar. */
673 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
674 return expand_vector_piecewise (bsi, f,
675 type, TREE_TYPE (type),
676 a, b, code);
677 else if (n_words > 1)
679 tree word_type = build_word_mode_vector_type (n_words);
680 result = expand_vector_piecewise (bsi, f,
681 word_type, TREE_TYPE (word_type),
682 a, b, code);
683 result = gimplify_val (bsi, word_type, result);
685 else
687 /* Use a single scalar operation with a mode no wider than word_mode. */
688 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
689 compute_type = lang_hooks.types.type_for_mode (mode, 1);
690 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
693 return build1 (VIEW_CONVERT_EXPR, type, result);
696 /* Expand a vector operation to scalars; for integer types we can use
697 special bit twiddling tricks to do the sums a word at a time, using
698 function F_PARALLEL instead of F. These tricks are done only if
699 they can process at least four items, that is, only if the vector
700 holds at least four items and if a word can hold four items. */
701 static tree
702 expand_vector_addition (block_stmt_iterator *bsi,
703 elem_op_func f, elem_op_func f_parallel,
704 tree type, tree a, tree b, enum tree_code code)
706 int parts_per_word = UNITS_PER_WORD
707 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
709 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
710 && parts_per_word >= 4
711 && TYPE_VECTOR_SUBPARTS (type) >= 4)
712 return expand_vector_parallel (bsi, f_parallel,
713 type, a, b, code);
714 else
715 return expand_vector_piecewise (bsi, f,
716 type, TREE_TYPE (type),
717 a, b, code);
720 /* Return a type for the widest vector mode whose components are of mode
721 INNER_MODE, or NULL_TREE if none is found. */
722 static tree
723 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
725 enum machine_mode best_mode = VOIDmode, mode;
726 int best_nunits = 0;
728 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
729 mode = MIN_MODE_VECTOR_FLOAT;
730 else
731 mode = MIN_MODE_VECTOR_INT;
733 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
734 if (GET_MODE_INNER (mode) == inner_mode
735 && GET_MODE_NUNITS (mode) > best_nunits
736 && op->handlers[mode].insn_code != CODE_FOR_nothing)
737 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
739 if (best_mode == VOIDmode)
740 return NULL_TREE;
741 else
742 return lang_hooks.types.type_for_mode (best_mode, 1);
745 /* Process one statement. If we identify a vector operation, expand it. */
747 static void
748 expand_vector_operations_1 (block_stmt_iterator *bsi)
750 tree stmt = bsi_stmt (*bsi);
751 tree *p_rhs, rhs, type, compute_type;
752 enum tree_code code;
753 enum machine_mode compute_mode;
754 optab op;
756 switch (TREE_CODE (stmt))
758 case RETURN_EXPR:
759 stmt = TREE_OPERAND (stmt, 0);
760 if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
761 return;
763 /* FALLTHRU */
765 case MODIFY_EXPR:
766 p_rhs = &TREE_OPERAND (stmt, 1);
767 rhs = *p_rhs;
768 break;
770 default:
771 return;
774 type = TREE_TYPE (rhs);
775 if (TREE_CODE (type) != VECTOR_TYPE)
776 return;
778 code = TREE_CODE (rhs);
779 if (TREE_CODE_CLASS (code) != '1'
780 && TREE_CODE_CLASS (code) != '2')
781 return;
783 if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
784 return;
786 gcc_assert (code != CONVERT_EXPR);
787 op = optab_for_tree_code (code, type);
789 /* Optabs will try converting a negation into a subtraction, so
790 look for it as well. TODO: negation of floating-point vectors
791 might be turned into an exclusive OR toggling the sign bit. */
792 if (op == NULL
793 && code == NEGATE_EXPR
794 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
795 op = optab_for_tree_code (MINUS_EXPR, type);
797 /* For very wide vectors, try using a smaller vector mode. */
798 compute_type = type;
799 if (TYPE_MODE (type) == BLKmode && op)
801 tree vector_compute_type
802 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
803 if (vector_compute_type != NULL_TREE)
804 compute_type = vector_compute_type;
807 compute_mode = TYPE_MODE (compute_type);
809 /* If we are breaking a BLKmode vector into smaller pieces,
810 type_for_widest_vector_mode has already looked into the optab,
811 so skip these checks. */
812 if (compute_type == type)
814 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
815 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
816 && op != NULL
817 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
818 return;
819 else
821 /* There is no operation in hardware, so fall back to scalars. */
822 compute_type = TREE_TYPE (type);
823 compute_mode = TYPE_MODE (compute_type);
827 /* If the compute mode is not a vector mode (hence we are decomposing
828 a BLKmode vector to smaller, hardware-supported vectors), we may
829 want to expand the operations in parallel. */
830 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
831 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
832 switch (code)
834 case PLUS_EXPR:
835 case MINUS_EXPR:
836 if (TYPE_TRAP_SIGNED (type))
837 break;
839 *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
840 TREE_OPERAND (rhs, 0),
841 TREE_OPERAND (rhs, 1), code);
842 modify_stmt (bsi_stmt (*bsi));
843 return;
845 case NEGATE_EXPR:
846 if (TYPE_TRAP_SIGNED (type))
847 break;
849 *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
850 TREE_OPERAND (rhs, 0),
851 NULL_TREE, code);
852 modify_stmt (bsi_stmt (*bsi));
853 return;
855 case BIT_AND_EXPR:
856 case BIT_IOR_EXPR:
857 case BIT_XOR_EXPR:
858 *p_rhs = expand_vector_parallel (bsi, do_binop, type,
859 TREE_OPERAND (rhs, 0),
860 TREE_OPERAND (rhs, 1), code);
861 modify_stmt (bsi_stmt (*bsi));
862 return;
864 case BIT_NOT_EXPR:
865 *p_rhs = expand_vector_parallel (bsi, do_unop, type,
866 TREE_OPERAND (rhs, 0),
867 NULL_TREE, code);
868 modify_stmt (bsi_stmt (*bsi));
869 return;
871 default:
872 break;
875 if (TREE_CODE_CLASS (code) == '1')
876 *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
877 TREE_OPERAND (rhs, 0),
878 NULL_TREE, code);
879 else
880 *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
881 TREE_OPERAND (rhs, 0),
882 TREE_OPERAND (rhs, 1), code);
884 modify_stmt (bsi_stmt (*bsi));
887 static void
888 expand_vector_operations (void)
890 block_stmt_iterator bsi;
891 basic_block bb;
893 FOR_EACH_BB (bb)
895 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
896 expand_vector_operations_1 (&bsi);
900 static void
901 tree_lower_operations (void)
903 int old_last_basic_block = last_basic_block;
904 block_stmt_iterator bsi;
905 basic_block bb;
907 FOR_EACH_BB (bb)
909 if (bb->index >= old_last_basic_block)
910 continue;
911 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
913 expand_complex_operations_1 (&bsi);
914 expand_vector_operations_1 (&bsi);
920 struct tree_opt_pass pass_lower_vector_ssa =
922 "vector", /* name */
923 NULL, /* gate */
924 expand_vector_operations, /* execute */
925 NULL, /* sub */
926 NULL, /* next */
927 0, /* static_pass_number */
928 0, /* tv_id */
929 PROP_cfg, /* properties_required */
930 0, /* properties_provided */
931 0, /* properties_destroyed */
932 0, /* todo_flags_start */
933 TODO_dump_func | TODO_rename_vars /* todo_flags_finish */
934 | TODO_ggc_collect | TODO_verify_ssa
935 | TODO_verify_stmts | TODO_verify_flow,
936 0 /* letter */
939 struct tree_opt_pass pass_pre_expand =
941 "oplower", /* name */
942 0, /* gate */
943 tree_lower_operations, /* execute */
944 NULL, /* sub */
945 NULL, /* next */
946 0, /* static_pass_number */
947 0, /* tv_id */
948 PROP_cfg, /* properties_required */
949 0, /* properties_provided */
950 0, /* properties_destroyed */
951 0, /* todo_flags_start */
952 TODO_dump_func | TODO_ggc_collect
953 | TODO_verify_stmts, /* todo_flags_finish */
954 0 /* letter */