tree-inline.c (estimate_num_insns_1): Handle VEC_COND_EXPR.
[official-gcc.git] / gcc / tree-complex.c
blob97951d0be958d07a450084132a102412b58204ee
1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "expr.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
30 #include "optabs.h"
31 #include "machmode.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "flags.h"
38 #include "ggc.h"
41 /* Extract the real or imaginary part of a complex variable or constant.
42 Make sure that it's a proper gimple_val and gimplify it if not.
43 Emit any new code before BSI. */
45 static tree
46 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
48 tree ret, inner_type;
50 inner_type = TREE_TYPE (TREE_TYPE (t));
51 switch (TREE_CODE (t))
53 case COMPLEX_CST:
54 ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
55 break;
57 case COMPLEX_EXPR:
58 ret = TREE_OPERAND (t, imagpart_p);
59 break;
61 case VAR_DECL:
62 case PARM_DECL:
63 ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
64 inner_type, t);
65 break;
67 default:
68 gcc_unreachable ();
71 return gimplify_val (bsi, inner_type, ret);
74 /* Update an assignment to a complex variable in place. */
76 static void
77 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
79 tree stmt = bsi_stmt (*bsi);
80 tree type;
82 if (TREE_CODE (stmt) == RETURN_EXPR)
83 stmt = TREE_OPERAND (stmt, 0);
85 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
86 TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
87 mark_stmt_modified (stmt);
90 /* Expand complex addition to scalars:
91 a + b = (ar + br) + i(ai + bi)
92 a - b = (ar - br) + i(ai + bi)
95 static void
96 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
97 tree ar, tree ai, tree br, tree bi,
98 enum tree_code code)
100 tree rr, ri;
102 rr = gimplify_build2 (bsi, code, inner_type, ar, br);
103 ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
105 update_complex_assignment (bsi, rr, ri);
108 /* Expand a complex multiplication or division to a libcall to the c99
109 compliant routines. */
111 static void
112 expand_complex_libcall (block_stmt_iterator *bsi, tree ar, tree ai,
113 tree br, tree bi, enum tree_code code)
115 enum machine_mode mode;
116 enum built_in_function bcode;
117 tree args, fn, stmt, type;
119 args = tree_cons (NULL, bi, NULL);
120 args = tree_cons (NULL, br, args);
121 args = tree_cons (NULL, ai, args);
122 args = tree_cons (NULL, ar, args);
124 stmt = bsi_stmt (*bsi);
125 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
127 mode = TYPE_MODE (type);
128 gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
129 if (code == MULT_EXPR)
130 bcode = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
131 else if (code == RDIV_EXPR)
132 bcode = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
133 else
134 gcc_unreachable ();
135 fn = built_in_decls[bcode];
137 TREE_OPERAND (stmt, 1)
138 = build3 (CALL_EXPR, type, build_fold_addr_expr (fn), args, NULL);
139 update_stmt (stmt);
142 /* Expand complex multiplication to scalars:
143 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
146 static void
147 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
148 tree ar, tree ai, tree br, tree bi)
150 tree t1, t2, t3, t4, rr, ri;
152 if (flag_complex_method == 2 && SCALAR_FLOAT_TYPE_P (inner_type))
154 expand_complex_libcall (bsi, ar, ai, br, bi, MULT_EXPR);
155 return;
158 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
159 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
160 t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
162 /* Avoid expanding redundant multiplication for the common
163 case of squaring a complex number. */
164 if (ar == br && ai == bi)
165 t4 = t3;
166 else
167 t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
169 rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
170 ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
172 update_complex_assignment (bsi, rr, ri);
175 /* Expand complex division to scalars, straightforward algorithm.
176 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
177 t = br*br + bi*bi
180 static void
181 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
182 tree ar, tree ai, tree br, tree bi,
183 enum tree_code code)
185 tree rr, ri, div, t1, t2, t3;
187 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
188 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
189 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
191 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
192 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
193 t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
194 rr = gimplify_build2 (bsi, code, inner_type, t3, div);
196 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
197 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
198 t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
199 ri = gimplify_build2 (bsi, code, inner_type, t3, div);
201 update_complex_assignment (bsi, rr, ri);
204 /* Expand complex division to scalars, modified algorithm to minimize
205 overflow with wide input ranges. */
207 static void
208 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
209 tree ar, tree ai, tree br, tree bi,
210 enum tree_code code)
212 tree rr, ri, ratio, div, t1, t2, tr, ti, cond;
213 basic_block bb_cond, bb_true, bb_false, bb_join;
215 /* Examine |br| < |bi|, and branch. */
216 t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
217 t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
218 cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
219 STRIP_NOPS (cond);
221 bb_cond = bb_true = bb_false = bb_join = NULL;
222 rr = ri = tr = ti = NULL;
223 if (!TREE_CONSTANT (cond))
225 edge e;
227 cond = build (COND_EXPR, void_type_node, cond, NULL, NULL);
228 bsi_insert_before (bsi, cond, BSI_SAME_STMT);
230 /* Split the original block, and create the TRUE and FALSE blocks. */
231 e = split_block (bsi->bb, cond);
232 bb_cond = e->src;
233 bb_join = e->dest;
234 bb_true = create_empty_bb (bb_cond);
235 bb_false = create_empty_bb (bb_true);
237 t1 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_true));
238 t2 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_false));
239 COND_EXPR_THEN (cond) = t1;
240 COND_EXPR_ELSE (cond) = t2;
242 /* Wire the blocks together. */
243 e->flags = EDGE_TRUE_VALUE;
244 redirect_edge_succ (e, bb_true);
245 make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
246 make_edge (bb_true, bb_join, EDGE_FALLTHRU);
247 make_edge (bb_false, bb_join, EDGE_FALLTHRU);
249 /* Update dominance info. Note that bb_join's data was
250 updated by split_block. */
251 if (dom_info_available_p (CDI_DOMINATORS))
253 set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
254 set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
257 rr = make_rename_temp (inner_type, NULL);
258 ri = make_rename_temp (inner_type, NULL);
261 /* In the TRUE branch, we compute
262 ratio = br/bi;
263 div = (br * ratio) + bi;
264 tr = (ar * ratio) + ai;
265 ti = (ai * ratio) - ar;
266 tr = tr / div;
267 ti = ti / div; */
268 if (bb_true || integer_nonzerop (cond))
270 if (bb_true)
272 *bsi = bsi_last (bb_true);
273 bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT);
276 ratio = gimplify_build2 (bsi, code, inner_type, br, bi);
278 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, ratio);
279 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, bi);
281 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
282 tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ai);
284 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
285 ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, ar);
287 tr = gimplify_build2 (bsi, code, inner_type, tr, div);
288 ti = gimplify_build2 (bsi, code, inner_type, ti, div);
290 if (bb_true)
292 t1 = build (MODIFY_EXPR, inner_type, rr, tr);
293 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
294 t1 = build (MODIFY_EXPR, inner_type, ri, ti);
295 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
296 bsi_remove (bsi);
300 /* In the FALSE branch, we compute
301 ratio = d/c;
302 divisor = (d * ratio) + c;
303 tr = (b * ratio) + a;
304 ti = b - (a * ratio);
305 tr = tr / div;
306 ti = ti / div; */
307 if (bb_false || integer_zerop (cond))
309 if (bb_false)
311 *bsi = bsi_last (bb_false);
312 bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT);
315 ratio = gimplify_build2 (bsi, code, inner_type, bi, br);
317 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, ratio);
318 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, br);
320 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
321 tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ar);
323 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
324 ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
326 tr = gimplify_build2 (bsi, code, inner_type, tr, div);
327 ti = gimplify_build2 (bsi, code, inner_type, ti, div);
329 if (bb_false)
331 t1 = build (MODIFY_EXPR, inner_type, rr, tr);
332 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
333 t1 = build (MODIFY_EXPR, inner_type, ri, ti);
334 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
335 bsi_remove (bsi);
339 if (bb_join)
340 *bsi = bsi_start (bb_join);
341 else
342 rr = tr, ri = ti;
344 update_complex_assignment (bsi, rr, ri);
347 /* Expand complex division to scalars. */
349 static void
350 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
351 tree ar, tree ai, tree br, tree bi,
352 enum tree_code code)
354 switch (flag_complex_method)
356 case 0:
357 /* straightforward implementation of complex divide acceptable. */
358 expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
359 break;
361 case 2:
362 if (SCALAR_FLOAT_TYPE_P (inner_type))
364 expand_complex_libcall (bsi, ar, ai, br, bi, code);
365 return;
367 /* FALLTHRU */
369 case 1:
370 /* wide ranges of inputs must work for complex divide. */
371 expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
372 break;
374 default:
375 gcc_unreachable ();
379 /* Expand complex negation to scalars:
380 -a = (-ar) + i(-ai)
383 static void
384 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
385 tree ar, tree ai)
387 tree rr, ri;
389 rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
390 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
392 update_complex_assignment (bsi, rr, ri);
395 /* Expand complex conjugate to scalars:
396 ~a = (ar) + i(-ai)
399 static void
400 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
401 tree ar, tree ai)
403 tree ri;
405 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
407 update_complex_assignment (bsi, ar, ri);
410 /* Expand complex comparison (EQ or NE only). */
412 static void
413 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
414 tree br, tree bi, enum tree_code code)
416 tree cr, ci, cc, stmt, expr, type;
418 cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
419 ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
420 cc = gimplify_build2 (bsi,
421 (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
422 boolean_type_node, cr, ci);
424 stmt = expr = bsi_stmt (*bsi);
426 switch (TREE_CODE (stmt))
428 case RETURN_EXPR:
429 expr = TREE_OPERAND (stmt, 0);
430 /* FALLTHRU */
431 case MODIFY_EXPR:
432 type = TREE_TYPE (TREE_OPERAND (expr, 1));
433 TREE_OPERAND (expr, 1) = fold_convert (type, cc);
434 break;
435 case COND_EXPR:
436 TREE_OPERAND (stmt, 0) = cc;
437 break;
438 default:
439 gcc_unreachable ();
442 mark_stmt_modified (stmt);
445 /* Process one statement. If we identify a complex operation, expand it. */
447 static void
448 expand_complex_operations_1 (block_stmt_iterator *bsi)
450 tree stmt = bsi_stmt (*bsi);
451 tree rhs, type, inner_type;
452 tree ac, ar, ai, bc, br, bi;
453 enum tree_code code;
455 switch (TREE_CODE (stmt))
457 case RETURN_EXPR:
458 stmt = TREE_OPERAND (stmt, 0);
459 if (!stmt)
460 return;
461 if (TREE_CODE (stmt) != MODIFY_EXPR)
462 return;
463 /* FALLTHRU */
465 case MODIFY_EXPR:
466 rhs = TREE_OPERAND (stmt, 1);
467 break;
469 case COND_EXPR:
470 rhs = TREE_OPERAND (stmt, 0);
471 break;
473 default:
474 return;
477 type = TREE_TYPE (rhs);
478 code = TREE_CODE (rhs);
480 /* Initial filter for operations we handle. */
481 switch (code)
483 case PLUS_EXPR:
484 case MINUS_EXPR:
485 case MULT_EXPR:
486 case TRUNC_DIV_EXPR:
487 case CEIL_DIV_EXPR:
488 case FLOOR_DIV_EXPR:
489 case ROUND_DIV_EXPR:
490 case RDIV_EXPR:
491 case NEGATE_EXPR:
492 case CONJ_EXPR:
493 if (TREE_CODE (type) != COMPLEX_TYPE)
494 return;
495 inner_type = TREE_TYPE (type);
496 break;
498 case EQ_EXPR:
499 case NE_EXPR:
500 inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
501 if (TREE_CODE (inner_type) != COMPLEX_TYPE)
502 return;
503 break;
505 default:
506 return;
509 /* Extract the components of the two complex values. Make sure and
510 handle the common case of the same value used twice specially. */
511 ac = TREE_OPERAND (rhs, 0);
512 ar = extract_component (bsi, ac, 0);
513 ai = extract_component (bsi, ac, 1);
515 if (TREE_CODE_CLASS (code) == tcc_unary)
516 bc = br = bi = NULL;
517 else
519 bc = TREE_OPERAND (rhs, 1);
520 if (ac == bc)
521 br = ar, bi = ai;
522 else
524 br = extract_component (bsi, bc, 0);
525 bi = extract_component (bsi, bc, 1);
529 switch (code)
531 case PLUS_EXPR:
532 case MINUS_EXPR:
533 expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
534 break;
536 case MULT_EXPR:
537 expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
538 break;
540 case TRUNC_DIV_EXPR:
541 case CEIL_DIV_EXPR:
542 case FLOOR_DIV_EXPR:
543 case ROUND_DIV_EXPR:
544 case RDIV_EXPR:
545 expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
546 break;
548 case NEGATE_EXPR:
549 expand_complex_negation (bsi, inner_type, ar, ai);
550 break;
552 case CONJ_EXPR:
553 expand_complex_conjugate (bsi, inner_type, ar, ai);
554 break;
556 case EQ_EXPR:
557 case NE_EXPR:
558 expand_complex_comparison (bsi, ar, ai, br, bi, code);
559 break;
561 default:
562 gcc_unreachable ();
564 update_stmt_if_modified (stmt);
567 /* Build a constant of type TYPE, made of VALUE's bits replicated
568 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
569 static tree
570 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
572 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
573 int n = HOST_BITS_PER_WIDE_INT / width;
574 unsigned HOST_WIDE_INT low, high, mask;
575 tree ret;
577 gcc_assert (n);
579 if (width == HOST_BITS_PER_WIDE_INT)
580 low = value;
581 else
583 mask = ((HOST_WIDE_INT)1 << width) - 1;
584 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
587 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
588 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
589 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
590 high = 0;
591 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
592 high = low;
593 else
594 gcc_unreachable ();
596 ret = build_int_cst_wide (type, low, high);
597 return ret;
600 static GTY(()) tree vector_inner_type;
601 static GTY(()) tree vector_last_type;
602 static GTY(()) int vector_last_nunits;
604 /* Return a suitable vector types made of SUBPARTS units each of mode
605 "word_mode" (the global variable). */
606 static tree
607 build_word_mode_vector_type (int nunits)
609 if (!vector_inner_type)
610 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
611 else if (vector_last_nunits == nunits)
613 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
614 return vector_last_type;
617 /* We build a new type, but we canonicalize it nevertheless,
618 because it still saves some memory. */
619 vector_last_nunits = nunits;
620 vector_last_type = type_hash_canon (nunits,
621 build_vector_type (vector_inner_type,
622 nunits));
623 return vector_last_type;
626 typedef tree (*elem_op_func) (block_stmt_iterator *,
627 tree, tree, tree, tree, tree, enum tree_code);
629 static inline tree
630 tree_vec_extract (block_stmt_iterator *bsi, tree type,
631 tree t, tree bitsize, tree bitpos)
633 if (bitpos)
634 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
636 /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
637 anyway be stored in memory, so prefer NOP_EXPR. */
638 else if (TYPE_MODE (type) == BLKmode)
639 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
640 else
641 return gimplify_build1 (bsi, NOP_EXPR, type, t);
644 static tree
645 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
646 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
647 enum tree_code code)
649 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
650 return gimplify_build1 (bsi, code, inner_type, a);
653 static tree
654 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
655 tree bitpos, tree bitsize, enum tree_code code)
657 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
658 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
659 return gimplify_build2 (bsi, code, inner_type, a, b);
662 /* Expand vector addition to scalars. This does bit twiddling
663 in order to increase parallelism:
665 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
666 (a ^ b) & 0x80808080
668 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
669 (a ^ ~b) & 0x80808080
671 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
673 This optimization should be done only if 4 vector items or more
674 fit into a word. */
675 static tree
676 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
677 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
678 enum tree_code code)
680 tree inner_type = TREE_TYPE (TREE_TYPE (a));
681 unsigned HOST_WIDE_INT max;
682 tree low_bits, high_bits, a_low, b_low, result_low, signs;
684 max = GET_MODE_MASK (TYPE_MODE (inner_type));
685 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
686 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
688 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
689 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
691 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
692 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
693 if (code == PLUS_EXPR)
694 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
695 else
697 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
698 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
701 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
702 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
703 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
706 static tree
707 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
708 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
709 tree bitsize ATTRIBUTE_UNUSED,
710 enum tree_code code ATTRIBUTE_UNUSED)
712 tree inner_type = TREE_TYPE (TREE_TYPE (b));
713 HOST_WIDE_INT max;
714 tree low_bits, high_bits, b_low, result_low, signs;
716 max = GET_MODE_MASK (TYPE_MODE (inner_type));
717 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
718 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
720 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
722 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
723 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
724 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
725 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
726 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
729 /* Expand a vector operation to scalars, by using many operations
730 whose type is the vector type's inner type. */
731 static tree
732 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
733 tree type, tree inner_type,
734 tree a, tree b, enum tree_code code)
736 tree head, *chain = &head;
737 tree part_width = TYPE_SIZE (inner_type);
738 tree index = bitsize_int (0);
739 int nunits = TYPE_VECTOR_SUBPARTS (type);
740 int delta = tree_low_cst (part_width, 1)
741 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
742 int i;
744 for (i = 0; i < nunits;
745 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
747 tree result = f (bsi, inner_type, a, b, index, part_width, code);
748 *chain = tree_cons (NULL_TREE, result, NULL_TREE);
749 chain = &TREE_CHAIN (*chain);
752 return build1 (CONSTRUCTOR, type, head);
755 /* Expand a vector operation to scalars with the freedom to use
756 a scalar integer type, or to use a different size for the items
757 in the vector type. */
758 static tree
759 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
760 tree a, tree b,
761 enum tree_code code)
763 tree result, compute_type;
764 enum machine_mode mode;
765 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
767 /* We have three strategies. If the type is already correct, just do
768 the operation an element at a time. Else, if the vector is wider than
769 one word, do it a word at a time; finally, if the vector is smaller
770 than one word, do it as a scalar. */
771 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
772 return expand_vector_piecewise (bsi, f,
773 type, TREE_TYPE (type),
774 a, b, code);
775 else if (n_words > 1)
777 tree word_type = build_word_mode_vector_type (n_words);
778 result = expand_vector_piecewise (bsi, f,
779 word_type, TREE_TYPE (word_type),
780 a, b, code);
781 result = gimplify_val (bsi, word_type, result);
783 else
785 /* Use a single scalar operation with a mode no wider than word_mode. */
786 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
787 compute_type = lang_hooks.types.type_for_mode (mode, 1);
788 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
791 return result;
794 /* Expand a vector operation to scalars; for integer types we can use
795 special bit twiddling tricks to do the sums a word at a time, using
796 function F_PARALLEL instead of F. These tricks are done only if
797 they can process at least four items, that is, only if the vector
798 holds at least four items and if a word can hold four items. */
799 static tree
800 expand_vector_addition (block_stmt_iterator *bsi,
801 elem_op_func f, elem_op_func f_parallel,
802 tree type, tree a, tree b, enum tree_code code)
804 int parts_per_word = UNITS_PER_WORD
805 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
807 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
808 && parts_per_word >= 4
809 && TYPE_VECTOR_SUBPARTS (type) >= 4)
810 return expand_vector_parallel (bsi, f_parallel,
811 type, a, b, code);
812 else
813 return expand_vector_piecewise (bsi, f,
814 type, TREE_TYPE (type),
815 a, b, code);
818 static tree
819 expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type,
820 tree rhs, enum tree_code code)
822 enum machine_mode compute_mode = TYPE_MODE (compute_type);
824 /* If the compute mode is not a vector mode (hence we are not decomposing
825 a BLKmode vector to smaller, hardware-supported vectors), we may want
826 to expand the operations in parallel. */
827 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
828 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
829 switch (code)
831 case PLUS_EXPR:
832 case MINUS_EXPR:
833 if (!TYPE_TRAP_SIGNED (type))
834 return expand_vector_addition (bsi, do_binop, do_plus_minus, type,
835 TREE_OPERAND (rhs, 0),
836 TREE_OPERAND (rhs, 1), code);
837 break;
839 case NEGATE_EXPR:
840 if (!TYPE_TRAP_SIGNED (type))
841 return expand_vector_addition (bsi, do_unop, do_negate, type,
842 TREE_OPERAND (rhs, 0),
843 NULL_TREE, code);
844 break;
846 case BIT_AND_EXPR:
847 case BIT_IOR_EXPR:
848 case BIT_XOR_EXPR:
849 return expand_vector_parallel (bsi, do_binop, type,
850 TREE_OPERAND (rhs, 0),
851 TREE_OPERAND (rhs, 1), code);
853 case BIT_NOT_EXPR:
854 return expand_vector_parallel (bsi, do_unop, type,
855 TREE_OPERAND (rhs, 0),
856 NULL_TREE, code);
858 default:
859 break;
862 if (TREE_CODE_CLASS (code) == tcc_unary)
863 return expand_vector_piecewise (bsi, do_unop, type, compute_type,
864 TREE_OPERAND (rhs, 0),
865 NULL_TREE, code);
866 else
867 return expand_vector_piecewise (bsi, do_binop, type, compute_type,
868 TREE_OPERAND (rhs, 0),
869 TREE_OPERAND (rhs, 1), code);
872 /* Return a type for the widest vector mode whose components are of mode
873 INNER_MODE, or NULL_TREE if none is found. */
874 static tree
875 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
877 enum machine_mode best_mode = VOIDmode, mode;
878 int best_nunits = 0;
880 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
881 mode = MIN_MODE_VECTOR_FLOAT;
882 else
883 mode = MIN_MODE_VECTOR_INT;
885 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
886 if (GET_MODE_INNER (mode) == inner_mode
887 && GET_MODE_NUNITS (mode) > best_nunits
888 && op->handlers[mode].insn_code != CODE_FOR_nothing)
889 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
891 if (best_mode == VOIDmode)
892 return NULL_TREE;
893 else
894 return lang_hooks.types.type_for_mode (best_mode, 1);
897 /* Process one statement. If we identify a vector operation, expand it. */
899 static void
900 expand_vector_operations_1 (block_stmt_iterator *bsi)
902 tree stmt = bsi_stmt (*bsi);
903 tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type;
904 enum tree_code code;
905 enum machine_mode compute_mode;
906 optab op;
908 switch (TREE_CODE (stmt))
910 case RETURN_EXPR:
911 stmt = TREE_OPERAND (stmt, 0);
912 if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
913 return;
915 /* FALLTHRU */
917 case MODIFY_EXPR:
918 p_lhs = &TREE_OPERAND (stmt, 0);
919 p_rhs = &TREE_OPERAND (stmt, 1);
920 lhs = *p_lhs;
921 rhs = *p_rhs;
922 break;
924 default:
925 return;
928 type = TREE_TYPE (rhs);
929 if (TREE_CODE (type) != VECTOR_TYPE)
930 return;
932 code = TREE_CODE (rhs);
933 if (TREE_CODE_CLASS (code) != tcc_unary
934 && TREE_CODE_CLASS (code) != tcc_binary)
935 return;
937 if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
938 return;
940 gcc_assert (code != CONVERT_EXPR);
941 op = optab_for_tree_code (code, type);
943 /* Optabs will try converting a negation into a subtraction, so
944 look for it as well. TODO: negation of floating-point vectors
945 might be turned into an exclusive OR toggling the sign bit. */
946 if (op == NULL
947 && code == NEGATE_EXPR
948 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
949 op = optab_for_tree_code (MINUS_EXPR, type);
951 /* For very wide vectors, try using a smaller vector mode. */
952 compute_type = type;
953 if (TYPE_MODE (type) == BLKmode && op)
955 tree vector_compute_type
956 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
957 if (vector_compute_type != NULL_TREE)
958 compute_type = vector_compute_type;
961 /* If we are breaking a BLKmode vector into smaller pieces,
962 type_for_widest_vector_mode has already looked into the optab,
963 so skip these checks. */
964 if (compute_type == type)
966 compute_mode = TYPE_MODE (compute_type);
967 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
968 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
969 && op != NULL
970 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
971 return;
972 else
973 /* There is no operation in hardware, so fall back to scalars. */
974 compute_type = TREE_TYPE (type);
977 rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
978 if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
979 *p_rhs = rhs;
980 else
982 /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
983 be stored in memory anyway, so prefer NOP_EXPR. We should also try
984 performing the VIEW_CONVERT_EXPR on the left side of the
985 assignment. */
986 if (TYPE_MODE (TREE_TYPE (rhs)) == BLKmode)
987 *p_rhs = gimplify_build1 (bsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs);
988 else
989 *p_rhs = gimplify_build1 (bsi, NOP_EXPR, TREE_TYPE (lhs), rhs);
992 mark_stmt_modified (bsi_stmt (*bsi));
995 /* Use this to lower vector operations introduced by the vectorizer,
996 if it may need the bit-twiddling tricks implemented in this file. */
998 static bool
999 gate_expand_vector_operations (void)
1001 return flag_tree_vectorize != 0;
1004 static void
1005 expand_vector_operations (void)
1007 block_stmt_iterator bsi;
1008 basic_block bb;
1010 FOR_EACH_BB (bb)
1012 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1014 expand_vector_operations_1 (&bsi);
1015 update_stmt_if_modified (bsi_stmt (bsi));
1020 static void
1021 tree_lower_operations (void)
1023 int old_last_basic_block = last_basic_block;
1024 block_stmt_iterator bsi;
1025 basic_block bb;
1027 FOR_EACH_BB (bb)
1029 if (bb->index >= old_last_basic_block)
1030 continue;
1031 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1033 expand_complex_operations_1 (&bsi);
1034 expand_vector_operations_1 (&bsi);
1040 struct tree_opt_pass pass_lower_vector_ssa =
1042 "veclower", /* name */
1043 gate_expand_vector_operations, /* gate */
1044 expand_vector_operations, /* execute */
1045 NULL, /* sub */
1046 NULL, /* next */
1047 0, /* static_pass_number */
1048 0, /* tv_id */
1049 PROP_cfg, /* properties_required */
1050 0, /* properties_provided */
1051 0, /* properties_destroyed */
1052 0, /* todo_flags_start */
1053 TODO_dump_func | TODO_update_ssa /* todo_flags_finish */
1054 | TODO_ggc_collect | TODO_verify_ssa
1055 | TODO_verify_stmts | TODO_verify_flow,
1056 0 /* letter */
1059 struct tree_opt_pass pass_pre_expand =
1061 "oplower", /* name */
1062 0, /* gate */
1063 tree_lower_operations, /* execute */
1064 NULL, /* sub */
1065 NULL, /* next */
1066 0, /* static_pass_number */
1067 0, /* tv_id */
1068 PROP_cfg, /* properties_required */
1069 0, /* properties_provided */
1070 0, /* properties_destroyed */
1071 0, /* todo_flags_start */
1072 TODO_dump_func | TODO_ggc_collect
1073 | TODO_verify_stmts, /* todo_flags_finish */
1074 0 /* letter */
1077 #include "gt-tree-complex.h"