gcc/tree-complex.c

   1 /* Lower complex number and vector operations to scalar operations.
   2    Copyright (C) 2004 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tree.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "expr.h"
  28 #include "insn-codes.h"
  29 #include "diagnostic.h"
  30 #include "optabs.h"
  31 #include "machmode.h"
  32 #include "langhooks.h"
  33 #include "tree-flow.h"
  34 #include "tree-gimple.h"
  35 #include "tree-iterator.h"
  36 #include "tree-pass.h"
  37 #include "flags.h"
  38
  39
  40 /* Extract the real or imaginary part of a complex variable or constant.
  41    Make sure that it's a proper gimple_val and gimplify it if not.
  42    Emit any new code before BSI.  */
  43
  44 static tree
  45 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
  46 {
  47   tree ret, inner_type;
  48
  49   inner_type = TREE_TYPE (TREE_TYPE (t));
  50   switch (TREE_CODE (t))
  51     {
  52     case COMPLEX_CST:
  53       ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
  54       break;
  55
  56     case COMPLEX_EXPR:
  57       ret = TREE_OPERAND (t, imagpart_p);
  58       break;
  59
  60     case VAR_DECL:
  61     case PARM_DECL:
  62       ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
  63                     inner_type, t);
  64       break;
  65
  66     default:
  67       abort ();
  68     }
  69
  70   return gimplify_val (bsi, inner_type, ret);
  71 }
  72
  73 /* Update an assignment to a complex variable in place.  */
  74
  75 static void
  76 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
  77 {
  78   tree stmt = bsi_stmt (*bsi);
  79   tree type;
  80
  81   modify_stmt (stmt);
  82   if (TREE_CODE (stmt) == RETURN_EXPR)
  83     stmt = TREE_OPERAND (stmt, 0);
  84
  85   type = TREE_TYPE (TREE_OPERAND (stmt, 1));
  86   TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
  87 }
  88
  89 /* Expand complex addition to scalars:
  90         a + b = (ar + br) + i(ai + bi)
  91         a - b = (ar - br) + i(ai + bi)
  92 */
  93
  94 static void
  95 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
  96                          tree ar, tree ai, tree br, tree bi,
  97                          enum tree_code code)
  98 {
  99   tree rr, ri;
 100
 101   rr = gimplify_build2 (bsi, code, inner_type, ar, br);
 102   ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
 103
 104   update_complex_assignment (bsi, rr, ri);
 105 }
 106
 107 /* Expand complex multiplication to scalars:
 108         a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
 109 */
 110
 111 static void
 112 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
 113                                tree ar, tree ai, tree br, tree bi)
 114 {
 115   tree t1, t2, t3, t4, rr, ri;
 116
 117   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
 118   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
 119   t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
 120
 121   /* Avoid expanding redundant multiplication for the common
 122      case of squaring a complex number.  */
 123   if (ar == br && ai == bi)
 124     t4 = t3;
 125   else
 126     t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
 127
 128   rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
 129   ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
 130
 131   update_complex_assignment (bsi, rr, ri);
 132 }
 133
 134 /* Expand complex division to scalars, straightforward algorithm.
 135         a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
 136             t = br*br + bi*bi
 137 */
 138
 139 static void
 140 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
 141                              tree ar, tree ai, tree br, tree bi,
 142                              enum tree_code code)
 143 {
 144   tree rr, ri, div, t1, t2, t3;
 145
 146   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
 147   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
 148   div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
 149
 150   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
 151   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
 152   t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
 153   rr = gimplify_build2 (bsi, code, inner_type, t3, div);
 154
 155   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
 156   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
 157   t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
 158   ri = gimplify_build2 (bsi, code, inner_type, t3, div);
 159
 160   update_complex_assignment (bsi, rr, ri);
 161 }
 162
 163 /* Expand complex division to scalars, modified algorithm to minimize
 164    overflow with wide input ranges.  */
 165
 166 static void
 167 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
 168                          tree ar, tree ai, tree br, tree bi,
 169                          enum tree_code code)
 170 {
 171   tree rr, ri, ratio, div, t1, t2, min, max, cond;
 172
 173   /* Examine |br| < |bi|, and branch.  */
 174   t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
 175   t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
 176   cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
 177   STRIP_NOPS (cond);
 178
 179   if (TREE_CONSTANT (cond))
 180     {
 181       if (integer_zerop (cond))
 182         min = bi, max = br;
 183       else
 184         min = br, max = bi;
 185     }
 186   else
 187     {
 188       basic_block bb_cond, bb_true, bb_false, bb_join;
 189       tree l1, l2, l3;
 190       edge e;
 191
 192       l1 = create_artificial_label ();
 193       t1 = build (GOTO_EXPR, void_type_node, l1);
 194       l2 = create_artificial_label ();
 195       t2 = build (GOTO_EXPR, void_type_node, l2);
 196       cond = build (COND_EXPR, void_type_node, cond, t1, t2);
 197       bsi_insert_before (bsi, cond, BSI_SAME_STMT);
 198
 199       min = make_rename_temp (inner_type, NULL);
 200       max = make_rename_temp (inner_type, NULL);
 201       l3 = create_artificial_label ();
 202
 203       /* Split the original block, and create the TRUE and FALSE blocks.  */
 204       e = split_block (bsi->bb, cond);
 205       bb_cond = e->src;
 206       bb_join = e->dest;
 207       bb_true = create_empty_bb (bb_cond);
 208       bb_false = create_empty_bb (bb_true);
 209
 210       /* Wire the blocks together.  */
 211       e->flags = EDGE_TRUE_VALUE;
 212       redirect_edge_succ (e, bb_true);
 213       make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
 214       make_edge (bb_true, bb_join, 0);
 215       make_edge (bb_false, bb_join, 0);
 216
 217       /* Update dominance info.  Note that bb_join's data was
 218          updated by split_block.  */
 219       if (dom_computed[CDI_DOMINATORS] >= DOM_CONS_OK)
 220         {
 221           set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
 222           set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
 223         }
 224
 225       /* Compute min and max for TRUE block.  */
 226       *bsi = bsi_start (bb_true);
 227       t1 = build (LABEL_EXPR, void_type_node, l1);
 228       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 229       t1 = build (MODIFY_EXPR, inner_type, min, br);
 230       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 231       t1 = build (MODIFY_EXPR, inner_type, max, bi);
 232       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 233
 234       /* Compute min and max for FALSE block.  */
 235       *bsi = bsi_start (bb_false);
 236       t1 = build (LABEL_EXPR, void_type_node, l2);
 237       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 238       t1 = build (MODIFY_EXPR, inner_type, min, bi);
 239       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 240       t1 = build (MODIFY_EXPR, inner_type, max, br);
 241       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 242
 243       /* Insert the join label into the tail of the original block.  */
 244       *bsi = bsi_start (bb_join);
 245       t1 = build (LABEL_EXPR, void_type_node, l3);
 246       bsi_insert_before (bsi, t1, BSI_SAME_STMT);
 247     }
 248
 249   /* Now we have MIN(|br|, |bi|) and MAX(|br|, |bi|).  We now use the
 250      ratio min/max to scale both the dividend and divisor.  */
 251   ratio = gimplify_build2 (bsi, code, inner_type, min, max);
 252
 253   /* Calculate the divisor: min*ratio + max.  */
 254   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, min, ratio);
 255   div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, max);
 256
 257   /* Result is now ((ar + ai*ratio)/div) + i((ai - ar*ratio)/div). */
 258   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
 259   t2 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, ar, t1);
 260   rr = gimplify_build2 (bsi, code, inner_type, t2, div);
 261
 262   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
 263   t2 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
 264   ri = gimplify_build2 (bsi, code, inner_type, t2, div);
 265
 266   update_complex_assignment (bsi, rr, ri);
 267 }
 268
 269 /* Expand complex division to scalars.  */
 270
 271 static void
 272 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
 273                          tree ar, tree ai, tree br, tree bi,
 274                          enum tree_code code)
 275 {
 276   switch (flag_complex_divide_method)
 277     {
 278     case 0:
 279       /* straightforward implementation of complex divide acceptable.  */
 280       expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
 281       break;
 282     case 1:
 283       /* wide ranges of inputs must work for complex divide.  */
 284       expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
 285       break;
 286     default:
 287       /* C99-like requirements for complex divide (not yet implemented).  */
 288       abort ();
 289     }
 290 }
 291
 292 /* Expand complex negation to scalars:
 293         -a = (-ar) + i(-ai)
 294 */
 295
 296 static void
 297 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
 298                          tree ar, tree ai)
 299 {
 300   tree rr, ri;
 301
 302   rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
 303   ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
 304
 305   update_complex_assignment (bsi, rr, ri);
 306 }
 307
 308 /* Expand complex conjugate to scalars:
 309         ~a = (ar) + i(-ai)
 310 */
 311
 312 static void
 313 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
 314                           tree ar, tree ai)
 315 {
 316   tree ri;
 317
 318   ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
 319
 320   update_complex_assignment (bsi, ar, ri);
 321 }
 322
 323 /* Expand complex comparison (EQ or NE only).  */
 324
 325 static void
 326 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
 327                            tree br, tree bi, enum tree_code code)
 328 {
 329   tree cr, ci, cc, stmt, type;
 330
 331   cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
 332   ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
 333   cc = gimplify_build2 (bsi,
 334                         (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
 335                         boolean_type_node, cr, ci);
 336
 337   stmt = bsi_stmt (*bsi);
 338   modify_stmt (stmt);
 339
 340   switch (TREE_CODE (stmt))
 341     {
 342     case RETURN_EXPR:
 343       stmt = TREE_OPERAND (stmt, 0);
 344       /* FALLTHRU */
 345     case MODIFY_EXPR:
 346       type = TREE_TYPE (TREE_OPERAND (stmt, 1));
 347       TREE_OPERAND (stmt, 1) = fold_convert (type, cc);
 348       break;
 349     case COND_EXPR:
 350       TREE_OPERAND (stmt, 0) = cc;
 351       break;
 352     default:
 353       abort ();
 354     }
 355 }
 356
 357 /* Process one statement.  If we identify a complex operation, expand it.  */
 358
 359 static void
 360 expand_complex_operations_1 (block_stmt_iterator *bsi)
 361 {
 362   tree stmt = bsi_stmt (*bsi);
 363   tree rhs, type, inner_type;
 364   tree ac, ar, ai, bc, br, bi;
 365   enum tree_code code;
 366
 367   switch (TREE_CODE (stmt))
 368     {
 369     case RETURN_EXPR:
 370       stmt = TREE_OPERAND (stmt, 0);
 371       if (!stmt)
 372         return;
 373       if (TREE_CODE (stmt) != MODIFY_EXPR)
 374         return;
 375       /* FALLTHRU */
 376
 377     case MODIFY_EXPR:
 378       rhs = TREE_OPERAND (stmt, 1);
 379       break;
 380
 381     case COND_EXPR:
 382       rhs = TREE_OPERAND (stmt, 0);
 383       break;
 384
 385     default:
 386       return;
 387     }
 388
 389   type = TREE_TYPE (rhs);
 390   code = TREE_CODE (rhs);
 391
 392   /* Initial filter for operations we handle.  */
 393   switch (code)
 394     {
 395     case PLUS_EXPR:
 396     case MINUS_EXPR:
 397     case MULT_EXPR:
 398     case TRUNC_DIV_EXPR:
 399     case CEIL_DIV_EXPR:
 400     case FLOOR_DIV_EXPR:
 401     case ROUND_DIV_EXPR:
 402     case RDIV_EXPR:
 403     case NEGATE_EXPR:
 404     case CONJ_EXPR:
 405       if (TREE_CODE (type) != COMPLEX_TYPE)
 406         return;
 407       inner_type = TREE_TYPE (type);
 408       break;
 409
 410     case EQ_EXPR:
 411     case NE_EXPR:
 412       inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
 413       if (TREE_CODE (inner_type) != COMPLEX_TYPE)
 414         return;
 415       break;
 416
 417     default:
 418       return;
 419     }
 420
 421   /* Extract the components of the two complex values.  Make sure and
 422      handle the common case of the same value used twice specially.  */
 423   ac = TREE_OPERAND (rhs, 0);
 424   ar = extract_component (bsi, ac, 0);
 425   ai = extract_component (bsi, ac, 1);
 426
 427   if (TREE_CODE_CLASS (code) == '1')
 428     bc = br = bi = NULL;
 429   else
 430     {
 431       bc = TREE_OPERAND (rhs, 1);
 432       if (ac == bc)
 433         br = ar, bi = ai;
 434       else
 435         {
 436           br = extract_component (bsi, bc, 0);
 437           bi = extract_component (bsi, bc, 1);
 438         }
 439     }
 440
 441   switch (code)
 442     {
 443     case PLUS_EXPR:
 444     case MINUS_EXPR:
 445       expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
 446       break;
 447
 448     case MULT_EXPR:
 449       expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
 450       break;
 451
 452     case TRUNC_DIV_EXPR:
 453     case CEIL_DIV_EXPR:
 454     case FLOOR_DIV_EXPR:
 455     case ROUND_DIV_EXPR:
 456     case RDIV_EXPR:
 457       expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
 458       break;
 459
 460     case NEGATE_EXPR:
 461       expand_complex_negation (bsi, inner_type, ar, ai);
 462       break;
 463
 464     case CONJ_EXPR:
 465       expand_complex_conjugate (bsi, inner_type, ar, ai);
 466       break;
 467
 468     case EQ_EXPR:
 469     case NE_EXPR:
 470       expand_complex_comparison (bsi, ar, ai, br, bi, code);
 471       break;
 472
 473     default:
 474       abort ();
 475     }
 476 }
 477 \f
 478 /* Build a constant of type TYPE, made of VALUE's bits replicated
 479    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
 480 static tree
 481 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
 482 {
 483   int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
 484   int n = HOST_BITS_PER_WIDE_INT / width;
 485   unsigned HOST_WIDE_INT low, high, mask;
 486   tree ret;
 487
 488   if (n == 0)
 489     abort ();
 490
 491   if (width == HOST_BITS_PER_WIDE_INT)
 492     low = value;
 493   else
 494     {
 495       mask = ((HOST_WIDE_INT)1 << width) - 1;
 496       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
 497     }
 498
 499   if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
 500     low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
 501   else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
 502     high = 0;
 503   else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
 504     high = low;
 505   else
 506     abort ();
 507
 508   ret = build_int_2 (low, high);
 509   TREE_TYPE (ret) = type;
 510   return ret;
 511 }
 512
 513 /* Return a suitable vector types made of SUBPARTS units each of mode
 514    "word_mode" (the global variable).  */
 515 static tree
 516 build_word_mode_vector_type (int nunits)
 517 {
 518   static tree innertype;
 519   static tree last;
 520   static int last_nunits;
 521
 522   if (!innertype)
 523     innertype = lang_hooks.types.type_for_mode (word_mode, 1);
 524   else if (last_nunits == nunits)
 525     return last;
 526
 527   /* We build a new type, but we canonicalize it nevertheless,
 528      because it still saves some memory.  */
 529   last_nunits = nunits;
 530   last = type_hash_canon (nunits, build_vector_type (innertype, nunits));
 531   return last;
 532 }
 533
 534 typedef tree (*elem_op_func) (block_stmt_iterator *,
 535                               tree, tree, tree, tree, tree, enum tree_code);
 536
 537 static inline tree
 538 tree_vec_extract (block_stmt_iterator *bsi, tree type,
 539                   tree t, tree bitsize, tree bitpos)
 540 {
 541   if (bitpos)
 542     return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
 543   else
 544     return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
 545 }
 546
 547 static tree
 548 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
 549          tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
 550          enum tree_code code)
 551 {
 552   a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
 553   return gimplify_build1 (bsi, code, inner_type, a);
 554 }
 555
 556 static tree
 557 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
 558           tree bitpos, tree bitsize, enum tree_code code)
 559 {
 560   a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
 561   b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
 562   return gimplify_build2 (bsi, code, inner_type, a, b);
 563 }
 564
 565 /* Expand vector addition to scalars.  This does bit twiddling
 566    in order to increase parallelism:
 567
 568    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
 569            (a ^ b) & 0x80808080
 570
 571    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
 572             (a ^ ~b) & 0x80808080
 573
 574    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
 575
 576    This optimization should be done only if 4 vector items or more
 577    fit into a word.  */
 578 static tree
 579 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
 580                tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
 581                enum tree_code code)
 582 {
 583   tree inner_type = TREE_TYPE (TREE_TYPE (a));
 584   unsigned HOST_WIDE_INT max;
 585   tree low_bits, high_bits, a_low, b_low, result_low, signs;
 586
 587   max = GET_MODE_MASK (TYPE_MODE (inner_type));
 588   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
 589   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
 590
 591   a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
 592   b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
 593
 594   signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
 595   b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
 596   if (code == PLUS_EXPR)
 597     a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
 598   else
 599     {
 600       a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
 601       signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
 602     }
 603
 604   signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
 605   result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
 606   return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
 607 }
 608
 609 static tree
 610 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
 611            tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
 612            tree bitsize ATTRIBUTE_UNUSED,
 613            enum tree_code code ATTRIBUTE_UNUSED)
 614 {
 615   tree inner_type = TREE_TYPE (TREE_TYPE (b));
 616   HOST_WIDE_INT max;
 617   tree low_bits, high_bits, b_low, result_low, signs;
 618
 619   max = GET_MODE_MASK (TYPE_MODE (inner_type));
 620   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
 621   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
 622
 623   b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
 624
 625   b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
 626   signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
 627   signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
 628   result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
 629   return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
 630 }
 631
 632 /* Expand a vector operation to scalars, by using many operations
 633    whose type is the vector type's inner type.  */
 634 static tree
 635 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
 636                          tree type, tree inner_type,
 637                          tree a, tree b, enum tree_code code)
 638 {
 639   tree head, *chain = &head;
 640   tree part_width = TYPE_SIZE (inner_type);
 641   tree index = bitsize_int (0);
 642   int nunits = TYPE_VECTOR_SUBPARTS (type);
 643   int delta = tree_low_cst (part_width, 1)
 644               / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
 645   int i;
 646
 647   for (i = 0; i < nunits;
 648        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
 649     {
 650       tree result = f (bsi, inner_type, a, b, index, part_width, code);
 651       *chain = tree_cons (NULL_TREE, result, NULL_TREE);
 652       chain = &TREE_CHAIN (*chain);
 653     }
 654
 655   return build1 (CONSTRUCTOR, type, head);
 656 }
 657
 658 /* Expand a vector operation to scalars with the freedom to use
 659    a scalar integer type, or to use a different size for the items
 660    in the vector type.  */
 661 static tree
 662 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
 663                         tree a, tree b,
 664                         enum tree_code code)
 665 {
 666   tree result, compute_type;
 667   enum machine_mode mode;
 668   int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
 669
 670   /* We have three strategies.  If the type is already correct, just do
 671      the operation an element at a time.  Else, if the vector is wider than
 672      one word, do it a word at a time; finally, if the vector is smaller
 673      than one word, do it as a scalar.  */
 674   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
 675      return expand_vector_piecewise (bsi, f,
 676                                      type, TREE_TYPE (type),
 677                                      a, b, code);
 678   else if (n_words > 1)
 679     {
 680       tree word_type = build_word_mode_vector_type (n_words);
 681       result = expand_vector_piecewise (bsi, f,
 682                                         word_type, TREE_TYPE (word_type),
 683                                         a, b, code);
 684       result = gimplify_val (bsi, word_type, result);
 685     }
 686   else
 687     {
 688       /* Use a single scalar operation with a mode no wider than word_mode.  */
 689       mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
 690       compute_type = lang_hooks.types.type_for_mode (mode, 1);
 691       result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
 692     }
 693
 694   return build1 (VIEW_CONVERT_EXPR, type, result);
 695 }
 696
 697 /* Expand a vector operation to scalars; for integer types we can use
 698    special bit twiddling tricks to do the sums a word at a time, using
 699    function F_PARALLEL instead of F.  These tricks are done only if
 700    they can process at least four items, that is, only if the vector
 701    holds at least four items and if a word can hold four items.  */
 702 static tree
 703 expand_vector_addition (block_stmt_iterator *bsi,
 704                         elem_op_func f, elem_op_func f_parallel,
 705                         tree type, tree a, tree b, enum tree_code code)
 706 {
 707   int parts_per_word = UNITS_PER_WORD
 708                        / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
 709
 710   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
 711       && parts_per_word >= 4
 712       && TYPE_VECTOR_SUBPARTS (type) >= 4)
 713     return expand_vector_parallel (bsi, f_parallel,
 714                                    type, a, b, code);
 715   else
 716     return expand_vector_piecewise (bsi, f,
 717                                     type, TREE_TYPE (type),
 718                                     a, b, code);
 719 }
 720
 721 /* Return a type for the widest vector mode whose components are of mode
 722    INNER_MODE, or NULL_TREE if none is found.  */
 723 static tree
 724 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
 725 {
 726   enum machine_mode best_mode = VOIDmode, mode;
 727   int best_nunits = 0;
 728
 729   if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
 730     mode = MIN_MODE_VECTOR_FLOAT;
 731   else
 732     mode = MIN_MODE_VECTOR_INT;
 733
 734   for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
 735     if (GET_MODE_INNER (mode) == inner_mode
 736         && GET_MODE_NUNITS (mode) > best_nunits
 737         && op->handlers[mode].insn_code != CODE_FOR_nothing)
 738       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
 739
 740   if (best_mode == VOIDmode)
 741     return NULL_TREE;
 742   else
 743     return lang_hooks.types.type_for_mode (best_mode, 1);
 744 }
 745
 746 /* Process one statement.  If we identify a vector operation, expand it.  */
 747
 748 static void
 749 expand_vector_operations_1 (block_stmt_iterator *bsi)
 750 {
 751   tree stmt = bsi_stmt (*bsi);
 752   tree *p_rhs, rhs, type, compute_type;
 753   enum tree_code code;
 754   enum machine_mode compute_mode;
 755   optab op;
 756
 757   switch (TREE_CODE (stmt))
 758     {
 759     case RETURN_EXPR:
 760       stmt = TREE_OPERAND (stmt, 0);
 761       if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
 762         return;
 763
 764       /* FALLTHRU */
 765
 766     case MODIFY_EXPR:
 767       p_rhs = &TREE_OPERAND (stmt, 1);
 768       rhs = *p_rhs;
 769       break;
 770
 771     default:
 772       return;
 773     }
 774
 775   type = TREE_TYPE (rhs);
 776   if (TREE_CODE (type) != VECTOR_TYPE)
 777     return;
 778
 779   code = TREE_CODE (rhs);
 780   if (TREE_CODE_CLASS (code) != '1'
 781       && TREE_CODE_CLASS (code) != '2')
 782     return;
 783
 784   if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
 785     return;
 786
 787   if (code == CONVERT_EXPR)
 788     abort ();
 789
 790   op = optab_for_tree_code (code, type);
 791
 792   /* Optabs will try converting a negation into a subtraction, so
 793      look for it as well.  TODO: negation of floating-point vectors
 794      might be turned into an exclusive OR toggling the sign bit.  */
 795   if (op == NULL
 796       && code == NEGATE_EXPR
 797       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
 798     op = optab_for_tree_code (MINUS_EXPR, type);
 799
 800   /* For very wide vectors, try using a smaller vector mode.  */
 801   compute_type = type;
 802   if (TYPE_MODE (type) == BLKmode && op)
 803     {
 804       tree vector_compute_type
 805         = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
 806       if (vector_compute_type != NULL_TREE)
 807         compute_type = vector_compute_type;
 808     }
 809
 810   compute_mode = TYPE_MODE (compute_type);
 811
 812   /* If we are breaking a BLKmode vector into smaller pieces,
 813      type_for_widest_vector_mode has already looked into the optab,
 814      so skip these checks.  */
 815   if (compute_type == type)
 816     {
 817       if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
 818            || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
 819           && op != NULL
 820           && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
 821         return;
 822       else
 823         {
 824           /* There is no operation in hardware, so fall back to scalars.  */
 825           compute_type = TREE_TYPE (type);
 826           compute_mode = TYPE_MODE (compute_type);
 827         }
 828     }
 829
 830   /* If the compute mode is not a vector mode (hence we are decomposing
 831      a BLKmode vector to smaller, hardware-supported vectors), we may
 832      want to expand the operations in parallel.  */
 833   if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
 834       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
 835     switch (code)
 836       {
 837       case PLUS_EXPR:
 838       case MINUS_EXPR:
 839         if (TYPE_TRAP_SIGNED (type))
 840           break;
 841
 842         *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
 843                                          TREE_OPERAND (rhs, 0),
 844                                          TREE_OPERAND (rhs, 1), code);
 845         modify_stmt (bsi_stmt (*bsi));
 846         return;
 847
 848       case NEGATE_EXPR:
 849         if (TYPE_TRAP_SIGNED (type))
 850           break;
 851
 852         *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
 853                                          TREE_OPERAND (rhs, 0),
 854                                          NULL_TREE, code);
 855         modify_stmt (bsi_stmt (*bsi));
 856         return;
 857
 858       case BIT_AND_EXPR:
 859       case BIT_IOR_EXPR:
 860       case BIT_XOR_EXPR:
 861         *p_rhs = expand_vector_parallel (bsi, do_binop, type,
 862                                          TREE_OPERAND (rhs, 0),
 863                                          TREE_OPERAND (rhs, 1), code);
 864         modify_stmt (bsi_stmt (*bsi));
 865         return;
 866
 867       case BIT_NOT_EXPR:
 868         *p_rhs = expand_vector_parallel (bsi, do_unop, type,
 869                                          TREE_OPERAND (rhs, 0),
 870                                          NULL_TREE, code);
 871         modify_stmt (bsi_stmt (*bsi));
 872         return;
 873
 874       default:
 875         break;
 876       }
 877
 878   if (TREE_CODE_CLASS (code) == '1')
 879     *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
 880                                       TREE_OPERAND (rhs, 0),
 881                                       NULL_TREE, code);
 882   else
 883     *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
 884                                       TREE_OPERAND (rhs, 0),
 885                                       TREE_OPERAND (rhs, 1), code);
 886
 887   modify_stmt (bsi_stmt (*bsi));
 888 }
 889 \f
 890 static void
 891 expand_vector_operations (void)
 892 {
 893   block_stmt_iterator bsi;
 894   basic_block bb;
 895
 896   FOR_EACH_BB (bb)
 897     {
 898       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 899         expand_vector_operations_1 (&bsi);
 900     }
 901 }
 902
 903 static void
 904 tree_lower_operations (void)
 905 {
 906   int old_last_basic_block = last_basic_block;
 907   block_stmt_iterator bsi;
 908   basic_block bb;
 909
 910   FOR_EACH_BB (bb)
 911     {
 912       if (bb->index >= old_last_basic_block)
 913         continue;
 914       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 915         {
 916           expand_complex_operations_1 (&bsi);
 917           expand_vector_operations_1 (&bsi);
 918         }
 919     }
 920 }
 921
 922
 923 struct tree_opt_pass pass_lower_vector_ssa =
 924 {
 925   "vector",                             /* name */
 926   NULL,                                 /* gate */
 927   expand_vector_operations,             /* execute */
 928   NULL,                                 /* sub */
 929   NULL,                                 /* next */
 930   0,                                    /* static_pass_number */
 931   0,                                    /* tv_id */
 932   PROP_cfg,                             /* properties_required */
 933   0,                                    /* properties_provided */
 934   0,                                    /* properties_destroyed */
 935   0,                                    /* todo_flags_start */
 936   TODO_dump_func | TODO_rename_vars     /* todo_flags_finish */
 937     | TODO_ggc_collect | TODO_verify_ssa
 938     | TODO_verify_stmts | TODO_verify_flow
 939 };
 940
 941 struct tree_opt_pass pass_pre_expand =
 942 {
 943   "oplower",                            /* name */
 944   0,                                    /* gate */
 945   tree_lower_operations,                /* execute */
 946   NULL,                                 /* sub */
 947   NULL,                                 /* next */
 948   0,                                    /* static_pass_number */
 949   0,                                    /* tv_id */
 950   PROP_cfg,                             /* properties_required */
 951   0,                                    /* properties_provided */
 952   0,                                    /* properties_destroyed */
 953   0,                                    /* todo_flags_start */
 954   TODO_dump_func | TODO_ggc_collect
 955     | TODO_verify_stmts                 /* todo_flags_finish */
 956 };