gcc/tree-complex.c

   1 /* Lower complex number and vector operations to scalar operations.
   2    Copyright (C) 2004 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tree.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "expr.h"
  28 #include "insn-codes.h"
  29 #include "diagnostic.h"
  30 #include "optabs.h"
  31 #include "machmode.h"
  32 #include "langhooks.h"
  33 #include "tree-flow.h"
  34 #include "tree-gimple.h"
  35 #include "tree-iterator.h"
  36 #include "tree-pass.h"
  37 #include "flags.h"
  38
  39
  40 /* Extract the real or imaginary part of a complex variable or constant.
  41    Make sure that it's a proper gimple_val and gimplify it if not.
  42    Emit any new code before BSI.  */
  43
  44 static tree
  45 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
  46 {
  47   tree ret, inner_type;
  48
  49   inner_type = TREE_TYPE (TREE_TYPE (t));
  50   switch (TREE_CODE (t))
  51     {
  52     case COMPLEX_CST:
  53       ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
  54       break;
  55
  56     case COMPLEX_EXPR:
  57       ret = TREE_OPERAND (t, imagpart_p);
  58       break;
  59
  60     case VAR_DECL:
  61     case PARM_DECL:
  62       ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
  63                     inner_type, t);
  64       break;
  65
  66     default:
  67       gcc_unreachable ();
  68     }
  69
  70   return gimplify_val (bsi, inner_type, ret);
  71 }
  72
  73 /* Update an assignment to a complex variable in place.  */
  74
  75 static void
  76 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
  77 {
  78   tree stmt = bsi_stmt (*bsi);
  79   tree type;
  80
  81   if (TREE_CODE (stmt) == RETURN_EXPR)
  82     stmt = TREE_OPERAND (stmt, 0);
  83
  84   type = TREE_TYPE (TREE_OPERAND (stmt, 1));
  85   TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
  86   modify_stmt (stmt);
  87 }
  88
  89 /* Expand complex addition to scalars:
  90         a + b = (ar + br) + i(ai + bi)
  91         a - b = (ar - br) + i(ai + bi)
  92 */
  93
  94 static void
  95 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
  96                          tree ar, tree ai, tree br, tree bi,
  97                          enum tree_code code)
  98 {
  99   tree rr, ri;
 100
 101   rr = gimplify_build2 (bsi, code, inner_type, ar, br);
 102   ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
 103
 104   update_complex_assignment (bsi, rr, ri);
 105 }
 106
 107 /* Expand complex multiplication to scalars:
 108         a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
 109 */
 110
 111 static void
 112 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
 113                                tree ar, tree ai, tree br, tree bi)
 114 {
 115   tree t1, t2, t3, t4, rr, ri;
 116
 117   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
 118   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
 119   t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
 120
 121   /* Avoid expanding redundant multiplication for the common
 122      case of squaring a complex number.  */
 123   if (ar == br && ai == bi)
 124     t4 = t3;
 125   else
 126     t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
 127
 128   rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
 129   ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
 130
 131   update_complex_assignment (bsi, rr, ri);
 132 }
 133
 134 /* Expand complex division to scalars, straightforward algorithm.
 135         a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
 136             t = br*br + bi*bi
 137 */
 138
 139 static void
 140 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
 141                              tree ar, tree ai, tree br, tree bi,
 142                              enum tree_code code)
 143 {
 144   tree rr, ri, div, t1, t2, t3;
 145
 146   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
 147   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
 148   div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
 149
 150   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
 151   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
 152   t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
 153   rr = gimplify_build2 (bsi, code, inner_type, t3, div);
 154
 155   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
 156   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
 157   t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
 158   ri = gimplify_build2 (bsi, code, inner_type, t3, div);
 159
 160   update_complex_assignment (bsi, rr, ri);
 161 }
 162
 163 /* Expand complex division to scalars, modified algorithm to minimize
 164    overflow with wide input ranges.  */
 165
 166 static void
 167 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
 168                          tree ar, tree ai, tree br, tree bi,
 169                          enum tree_code code)
 170 {
 171   tree rr, ri, ratio, div, t1, t2, min, max, cond;
 172
 173   /* Examine |br| < |bi|, and branch.  */
 174   t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
 175   t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
 176   cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
 177   STRIP_NOPS (cond);
 178
 179   if (TREE_CONSTANT (cond))
 180     {
 181       if (integer_zerop (cond))
 182         min = bi, max = br;
 183       else
 184         min = br, max = bi;
 185     }
 186   else
 187     {
 188       basic_block bb_cond, bb_true, bb_false, bb_join;
 189       tree l1, l2, l3;
 190       edge e;
 191
 192       l1 = create_artificial_label ();
 193       t1 = build (GOTO_EXPR, void_type_node, l1);
 194       l2 = create_artificial_label ();
 195       t2 = build (GOTO_EXPR, void_type_node, l2);
 196       cond = build (COND_EXPR, void_type_node, cond, t1, t2);
 197       bsi_insert_before (bsi, cond, BSI_SAME_STMT);
 198
 199       min = make_rename_temp (inner_type, NULL);
 200       max = make_rename_temp (inner_type, NULL);
 201       l3 = create_artificial_label ();
 202
 203       /* Split the original block, and create the TRUE and FALSE blocks.  */
 204       e = split_block (bsi->bb, cond);
 205       bb_cond = e->src;
 206       bb_join = e->dest;
 207       bb_true = create_empty_bb (bb_cond);
 208       bb_false = create_empty_bb (bb_true);
 209
 210       /* Wire the blocks together.  */
 211       e->flags = EDGE_TRUE_VALUE;
 212       redirect_edge_succ (e, bb_true);
 213       make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
 214       make_edge (bb_true, bb_join, 0);
 215       make_edge (bb_false, bb_join, 0);
 216
 217       /* Update dominance info.  Note that bb_join's data was
 218          updated by split_block.  */
 219       if (dom_info_available_p (CDI_DOMINATORS))
 220         {
 221           set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
 222           set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
 223         }
 224
 225       /* Compute min and max for TRUE block.  */
 226       *bsi = bsi_start (bb_true);
 227       t1 = build (LABEL_EXPR, void_type_node, l1);
 228       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 229       t1 = build (MODIFY_EXPR, inner_type, min, br);
 230       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 231       t1 = build (MODIFY_EXPR, inner_type, max, bi);
 232       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 233
 234       /* Compute min and max for FALSE block.  */
 235       *bsi = bsi_start (bb_false);
 236       t1 = build (LABEL_EXPR, void_type_node, l2);
 237       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 238       t1 = build (MODIFY_EXPR, inner_type, min, bi);
 239       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 240       t1 = build (MODIFY_EXPR, inner_type, max, br);
 241       bsi_insert_after (bsi, t1, BSI_NEW_STMT);
 242
 243       /* Insert the join label into the tail of the original block.  */
 244       *bsi = bsi_start (bb_join);
 245       t1 = build (LABEL_EXPR, void_type_node, l3);
 246       bsi_insert_before (bsi, t1, BSI_SAME_STMT);
 247     }
 248
 249   /* Now we have MIN(|br|, |bi|) and MAX(|br|, |bi|).  We now use the
 250      ratio min/max to scale both the dividend and divisor.  */
 251   ratio = gimplify_build2 (bsi, code, inner_type, min, max);
 252
 253   /* Calculate the divisor: min*ratio + max.  */
 254   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, min, ratio);
 255   div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, max);
 256
 257   /* Result is now ((ar + ai*ratio)/div) + i((ai - ar*ratio)/div).  */
 258   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
 259   t2 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, ar, t1);
 260   rr = gimplify_build2 (bsi, code, inner_type, t2, div);
 261
 262   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
 263   t2 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
 264   ri = gimplify_build2 (bsi, code, inner_type, t2, div);
 265
 266   update_complex_assignment (bsi, rr, ri);
 267 }
 268
 269 /* Expand complex division to scalars.  */
 270
 271 static void
 272 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
 273                          tree ar, tree ai, tree br, tree bi,
 274                          enum tree_code code)
 275 {
 276   switch (flag_complex_divide_method)
 277     {
 278     case 0:
 279       /* straightforward implementation of complex divide acceptable.  */
 280       expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
 281       break;
 282     case 1:
 283       /* wide ranges of inputs must work for complex divide.  */
 284       expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
 285       break;
 286     default:
 287       /* C99-like requirements for complex divide (not yet implemented).  */
 288       gcc_unreachable ();
 289     }
 290 }
 291
 292 /* Expand complex negation to scalars:
 293         -a = (-ar) + i(-ai)
 294 */
 295
 296 static void
 297 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
 298                          tree ar, tree ai)
 299 {
 300   tree rr, ri;
 301
 302   rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
 303   ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
 304
 305   update_complex_assignment (bsi, rr, ri);
 306 }
 307
 308 /* Expand complex conjugate to scalars:
 309         ~a = (ar) + i(-ai)
 310 */
 311
 312 static void
 313 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
 314                           tree ar, tree ai)
 315 {
 316   tree ri;
 317
 318   ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
 319
 320   update_complex_assignment (bsi, ar, ri);
 321 }
 322
 323 /* Expand complex comparison (EQ or NE only).  */
 324
 325 static void
 326 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
 327                            tree br, tree bi, enum tree_code code)
 328 {
 329   tree cr, ci, cc, stmt, expr, type;
 330
 331   cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
 332   ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
 333   cc = gimplify_build2 (bsi,
 334                         (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
 335                         boolean_type_node, cr, ci);
 336
 337   stmt = expr = bsi_stmt (*bsi);
 338
 339   switch (TREE_CODE (stmt))
 340     {
 341     case RETURN_EXPR:
 342       expr = TREE_OPERAND (stmt, 0);
 343       /* FALLTHRU */
 344     case MODIFY_EXPR:
 345       type = TREE_TYPE (TREE_OPERAND (expr, 1));
 346       TREE_OPERAND (expr, 1) = fold_convert (type, cc);
 347       break;
 348     case COND_EXPR:
 349       TREE_OPERAND (stmt, 0) = cc;
 350       break;
 351     default:
 352       gcc_unreachable ();
 353     }
 354
 355   modify_stmt (stmt);
 356 }
 357
 358 /* Process one statement.  If we identify a complex operation, expand it.  */
 359
 360 static void
 361 expand_complex_operations_1 (block_stmt_iterator *bsi)
 362 {
 363   tree stmt = bsi_stmt (*bsi);
 364   tree rhs, type, inner_type;
 365   tree ac, ar, ai, bc, br, bi;
 366   enum tree_code code;
 367
 368   switch (TREE_CODE (stmt))
 369     {
 370     case RETURN_EXPR:
 371       stmt = TREE_OPERAND (stmt, 0);
 372       if (!stmt)
 373         return;
 374       if (TREE_CODE (stmt) != MODIFY_EXPR)
 375         return;
 376       /* FALLTHRU */
 377
 378     case MODIFY_EXPR:
 379       rhs = TREE_OPERAND (stmt, 1);
 380       break;
 381
 382     case COND_EXPR:
 383       rhs = TREE_OPERAND (stmt, 0);
 384       break;
 385
 386     default:
 387       return;
 388     }
 389
 390   type = TREE_TYPE (rhs);
 391   code = TREE_CODE (rhs);
 392
 393   /* Initial filter for operations we handle.  */
 394   switch (code)
 395     {
 396     case PLUS_EXPR:
 397     case MINUS_EXPR:
 398     case MULT_EXPR:
 399     case TRUNC_DIV_EXPR:
 400     case CEIL_DIV_EXPR:
 401     case FLOOR_DIV_EXPR:
 402     case ROUND_DIV_EXPR:
 403     case RDIV_EXPR:
 404     case NEGATE_EXPR:
 405     case CONJ_EXPR:
 406       if (TREE_CODE (type) != COMPLEX_TYPE)
 407         return;
 408       inner_type = TREE_TYPE (type);
 409       break;
 410
 411     case EQ_EXPR:
 412     case NE_EXPR:
 413       inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
 414       if (TREE_CODE (inner_type) != COMPLEX_TYPE)
 415         return;
 416       break;
 417
 418     default:
 419       return;
 420     }
 421
 422   /* Extract the components of the two complex values.  Make sure and
 423      handle the common case of the same value used twice specially.  */
 424   ac = TREE_OPERAND (rhs, 0);
 425   ar = extract_component (bsi, ac, 0);
 426   ai = extract_component (bsi, ac, 1);
 427
 428   if (TREE_CODE_CLASS (code) == tcc_unary)
 429     bc = br = bi = NULL;
 430   else
 431     {
 432       bc = TREE_OPERAND (rhs, 1);
 433       if (ac == bc)
 434         br = ar, bi = ai;
 435       else
 436         {
 437           br = extract_component (bsi, bc, 0);
 438           bi = extract_component (bsi, bc, 1);
 439         }
 440     }
 441
 442   switch (code)
 443     {
 444     case PLUS_EXPR:
 445     case MINUS_EXPR:
 446       expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
 447       break;
 448
 449     case MULT_EXPR:
 450       expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
 451       break;
 452
 453     case TRUNC_DIV_EXPR:
 454     case CEIL_DIV_EXPR:
 455     case FLOOR_DIV_EXPR:
 456     case ROUND_DIV_EXPR:
 457     case RDIV_EXPR:
 458       expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
 459       break;
 460
 461     case NEGATE_EXPR:
 462       expand_complex_negation (bsi, inner_type, ar, ai);
 463       break;
 464
 465     case CONJ_EXPR:
 466       expand_complex_conjugate (bsi, inner_type, ar, ai);
 467       break;
 468
 469     case EQ_EXPR:
 470     case NE_EXPR:
 471       expand_complex_comparison (bsi, ar, ai, br, bi, code);
 472       break;
 473
 474     default:
 475       gcc_unreachable ();
 476     }
 477 }
 478 \f
 479 /* Build a constant of type TYPE, made of VALUE's bits replicated
 480    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
 481 static tree
 482 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
 483 {
 484   int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
 485   int n = HOST_BITS_PER_WIDE_INT / width;
 486   unsigned HOST_WIDE_INT low, high, mask;
 487   tree ret;
 488
 489   gcc_assert (n);
 490
 491   if (width == HOST_BITS_PER_WIDE_INT)
 492     low = value;
 493   else
 494     {
 495       mask = ((HOST_WIDE_INT)1 << width) - 1;
 496       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
 497     }
 498
 499   if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
 500     low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
 501   else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
 502     high = 0;
 503   else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
 504     high = low;
 505   else
 506     gcc_unreachable ();
 507
 508   ret = build_int_cst_wide (type, low, high);
 509   return ret;
 510 }
 511
 512 /* Return a suitable vector types made of SUBPARTS units each of mode
 513    "word_mode" (the global variable).  */
 514 static tree
 515 build_word_mode_vector_type (int nunits)
 516 {
 517   static tree innertype;
 518   static tree last;
 519   static int last_nunits;
 520
 521   if (!innertype)
 522     innertype = lang_hooks.types.type_for_mode (word_mode, 1);
 523   else if (last_nunits == nunits)
 524     return last;
 525
 526   /* We build a new type, but we canonicalize it nevertheless,
 527      because it still saves some memory.  */
 528   last_nunits = nunits;
 529   last = type_hash_canon (nunits, build_vector_type (innertype, nunits));
 530   return last;
 531 }
 532
 533 typedef tree (*elem_op_func) (block_stmt_iterator *,
 534                               tree, tree, tree, tree, tree, enum tree_code);
 535
 536 static inline tree
 537 tree_vec_extract (block_stmt_iterator *bsi, tree type,
 538                   tree t, tree bitsize, tree bitpos)
 539 {
 540   if (bitpos)
 541     return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
 542   else
 543     return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
 544 }
 545
 546 static tree
 547 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
 548          tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
 549          enum tree_code code)
 550 {
 551   a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
 552   return gimplify_build1 (bsi, code, inner_type, a);
 553 }
 554
 555 static tree
 556 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
 557           tree bitpos, tree bitsize, enum tree_code code)
 558 {
 559   a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
 560   b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
 561   return gimplify_build2 (bsi, code, inner_type, a, b);
 562 }
 563
 564 /* Expand vector addition to scalars.  This does bit twiddling
 565    in order to increase parallelism:
 566
 567    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
 568            (a ^ b) & 0x80808080
 569
 570    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
 571             (a ^ ~b) & 0x80808080
 572
 573    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
 574
 575    This optimization should be done only if 4 vector items or more
 576    fit into a word.  */
 577 static tree
 578 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
 579                tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
 580                enum tree_code code)
 581 {
 582   tree inner_type = TREE_TYPE (TREE_TYPE (a));
 583   unsigned HOST_WIDE_INT max;
 584   tree low_bits, high_bits, a_low, b_low, result_low, signs;
 585
 586   max = GET_MODE_MASK (TYPE_MODE (inner_type));
 587   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
 588   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
 589
 590   a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
 591   b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
 592
 593   signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
 594   b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
 595   if (code == PLUS_EXPR)
 596     a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
 597   else
 598     {
 599       a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
 600       signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
 601     }
 602
 603   signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
 604   result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
 605   return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
 606 }
 607
 608 static tree
 609 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
 610            tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
 611            tree bitsize ATTRIBUTE_UNUSED,
 612            enum tree_code code ATTRIBUTE_UNUSED)
 613 {
 614   tree inner_type = TREE_TYPE (TREE_TYPE (b));
 615   HOST_WIDE_INT max;
 616   tree low_bits, high_bits, b_low, result_low, signs;
 617
 618   max = GET_MODE_MASK (TYPE_MODE (inner_type));
 619   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
 620   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
 621
 622   b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
 623
 624   b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
 625   signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
 626   signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
 627   result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
 628   return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
 629 }
 630
 631 /* Expand a vector operation to scalars, by using many operations
 632    whose type is the vector type's inner type.  */
 633 static tree
 634 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
 635                          tree type, tree inner_type,
 636                          tree a, tree b, enum tree_code code)
 637 {
 638   tree head, *chain = &head;
 639   tree part_width = TYPE_SIZE (inner_type);
 640   tree index = bitsize_int (0);
 641   int nunits = TYPE_VECTOR_SUBPARTS (type);
 642   int delta = tree_low_cst (part_width, 1)
 643               / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
 644   int i;
 645
 646   for (i = 0; i < nunits;
 647        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
 648     {
 649       tree result = f (bsi, inner_type, a, b, index, part_width, code);
 650       *chain = tree_cons (NULL_TREE, result, NULL_TREE);
 651       chain = &TREE_CHAIN (*chain);
 652     }
 653
 654   return build1 (CONSTRUCTOR, type, head);
 655 }
 656
 657 /* Expand a vector operation to scalars with the freedom to use
 658    a scalar integer type, or to use a different size for the items
 659    in the vector type.  */
 660 static tree
 661 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
 662                         tree a, tree b,
 663                         enum tree_code code)
 664 {
 665   tree result, compute_type;
 666   enum machine_mode mode;
 667   int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
 668
 669   /* We have three strategies.  If the type is already correct, just do
 670      the operation an element at a time.  Else, if the vector is wider than
 671      one word, do it a word at a time; finally, if the vector is smaller
 672      than one word, do it as a scalar.  */
 673   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
 674      return expand_vector_piecewise (bsi, f,
 675                                      type, TREE_TYPE (type),
 676                                      a, b, code);
 677   else if (n_words > 1)
 678     {
 679       tree word_type = build_word_mode_vector_type (n_words);
 680       result = expand_vector_piecewise (bsi, f,
 681                                         word_type, TREE_TYPE (word_type),
 682                                         a, b, code);
 683       result = gimplify_val (bsi, word_type, result);
 684     }
 685   else
 686     {
 687       /* Use a single scalar operation with a mode no wider than word_mode.  */
 688       mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
 689       compute_type = lang_hooks.types.type_for_mode (mode, 1);
 690       result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
 691     }
 692
 693   return build1 (VIEW_CONVERT_EXPR, type, result);
 694 }
 695
 696 /* Expand a vector operation to scalars; for integer types we can use
 697    special bit twiddling tricks to do the sums a word at a time, using
 698    function F_PARALLEL instead of F.  These tricks are done only if
 699    they can process at least four items, that is, only if the vector
 700    holds at least four items and if a word can hold four items.  */
 701 static tree
 702 expand_vector_addition (block_stmt_iterator *bsi,
 703                         elem_op_func f, elem_op_func f_parallel,
 704                         tree type, tree a, tree b, enum tree_code code)
 705 {
 706   int parts_per_word = UNITS_PER_WORD
 707                        / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
 708
 709   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
 710       && parts_per_word >= 4
 711       && TYPE_VECTOR_SUBPARTS (type) >= 4)
 712     return expand_vector_parallel (bsi, f_parallel,
 713                                    type, a, b, code);
 714   else
 715     return expand_vector_piecewise (bsi, f,
 716                                     type, TREE_TYPE (type),
 717                                     a, b, code);
 718 }
 719
 720 /* Return a type for the widest vector mode whose components are of mode
 721    INNER_MODE, or NULL_TREE if none is found.  */
 722 static tree
 723 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
 724 {
 725   enum machine_mode best_mode = VOIDmode, mode;
 726   int best_nunits = 0;
 727
 728   if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
 729     mode = MIN_MODE_VECTOR_FLOAT;
 730   else
 731     mode = MIN_MODE_VECTOR_INT;
 732
 733   for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
 734     if (GET_MODE_INNER (mode) == inner_mode
 735         && GET_MODE_NUNITS (mode) > best_nunits
 736         && op->handlers[mode].insn_code != CODE_FOR_nothing)
 737       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
 738
 739   if (best_mode == VOIDmode)
 740     return NULL_TREE;
 741   else
 742     return lang_hooks.types.type_for_mode (best_mode, 1);
 743 }
 744
 745 /* Process one statement.  If we identify a vector operation, expand it.  */
 746
 747 static void
 748 expand_vector_operations_1 (block_stmt_iterator *bsi)
 749 {
 750   tree stmt = bsi_stmt (*bsi);
 751   tree *p_rhs, rhs, type, compute_type;
 752   enum tree_code code;
 753   enum machine_mode compute_mode;
 754   optab op;
 755
 756   switch (TREE_CODE (stmt))
 757     {
 758     case RETURN_EXPR:
 759       stmt = TREE_OPERAND (stmt, 0);
 760       if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
 761         return;
 762
 763       /* FALLTHRU */
 764
 765     case MODIFY_EXPR:
 766       p_rhs = &TREE_OPERAND (stmt, 1);
 767       rhs = *p_rhs;
 768       break;
 769
 770     default:
 771       return;
 772     }
 773
 774   type = TREE_TYPE (rhs);
 775   if (TREE_CODE (type) != VECTOR_TYPE)
 776     return;
 777
 778   code = TREE_CODE (rhs);
 779   if (TREE_CODE_CLASS (code) != tcc_unary
 780       && TREE_CODE_CLASS (code) != tcc_binary)
 781     return;
 782
 783   if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
 784     return;
 785
 786   gcc_assert (code != CONVERT_EXPR);
 787   op = optab_for_tree_code (code, type);
 788
 789   /* Optabs will try converting a negation into a subtraction, so
 790      look for it as well.  TODO: negation of floating-point vectors
 791      might be turned into an exclusive OR toggling the sign bit.  */
 792   if (op == NULL
 793       && code == NEGATE_EXPR
 794       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
 795     op = optab_for_tree_code (MINUS_EXPR, type);
 796
 797   /* For very wide vectors, try using a smaller vector mode.  */
 798   compute_type = type;
 799   if (TYPE_MODE (type) == BLKmode && op)
 800     {
 801       tree vector_compute_type
 802         = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
 803       if (vector_compute_type != NULL_TREE)
 804         compute_type = vector_compute_type;
 805     }
 806
 807   compute_mode = TYPE_MODE (compute_type);
 808
 809   /* If we are breaking a BLKmode vector into smaller pieces,
 810      type_for_widest_vector_mode has already looked into the optab,
 811      so skip these checks.  */
 812   if (compute_type == type)
 813     {
 814       if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
 815            || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
 816           && op != NULL
 817           && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
 818         return;
 819       else
 820         {
 821           /* There is no operation in hardware, so fall back to scalars.  */
 822           compute_type = TREE_TYPE (type);
 823           compute_mode = TYPE_MODE (compute_type);
 824         }
 825     }
 826
 827   /* If the compute mode is not a vector mode (hence we are decomposing
 828      a BLKmode vector to smaller, hardware-supported vectors), we may
 829      want to expand the operations in parallel.  */
 830   if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
 831       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
 832     switch (code)
 833       {
 834       case PLUS_EXPR:
 835       case MINUS_EXPR:
 836         if (TYPE_TRAP_SIGNED (type))
 837           break;
 838
 839         *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
 840                                          TREE_OPERAND (rhs, 0),
 841                                          TREE_OPERAND (rhs, 1), code);
 842         modify_stmt (bsi_stmt (*bsi));
 843         return;
 844
 845       case NEGATE_EXPR:
 846         if (TYPE_TRAP_SIGNED (type))
 847           break;
 848
 849         *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
 850                                          TREE_OPERAND (rhs, 0),
 851                                          NULL_TREE, code);
 852         modify_stmt (bsi_stmt (*bsi));
 853         return;
 854
 855       case BIT_AND_EXPR:
 856       case BIT_IOR_EXPR:
 857       case BIT_XOR_EXPR:
 858         *p_rhs = expand_vector_parallel (bsi, do_binop, type,
 859                                          TREE_OPERAND (rhs, 0),
 860                                          TREE_OPERAND (rhs, 1), code);
 861         modify_stmt (bsi_stmt (*bsi));
 862         return;
 863
 864       case BIT_NOT_EXPR:
 865         *p_rhs = expand_vector_parallel (bsi, do_unop, type,
 866                                          TREE_OPERAND (rhs, 0),
 867                                          NULL_TREE, code);
 868         modify_stmt (bsi_stmt (*bsi));
 869         return;
 870
 871       default:
 872         break;
 873       }
 874
 875   if (TREE_CODE_CLASS (code) == tcc_unary)
 876     *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
 877                                       TREE_OPERAND (rhs, 0),
 878                                       NULL_TREE, code);
 879   else
 880     *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
 881                                       TREE_OPERAND (rhs, 0),
 882                                       TREE_OPERAND (rhs, 1), code);
 883
 884   modify_stmt (bsi_stmt (*bsi));
 885 }
 886 \f
 887 static void
 888 expand_vector_operations (void)
 889 {
 890   block_stmt_iterator bsi;
 891   basic_block bb;
 892
 893   FOR_EACH_BB (bb)
 894     {
 895       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 896         expand_vector_operations_1 (&bsi);
 897     }
 898 }
 899
 900 static void
 901 tree_lower_operations (void)
 902 {
 903   int old_last_basic_block = last_basic_block;
 904   block_stmt_iterator bsi;
 905   basic_block bb;
 906
 907   FOR_EACH_BB (bb)
 908     {
 909       if (bb->index >= old_last_basic_block)
 910         continue;
 911       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 912         {
 913           expand_complex_operations_1 (&bsi);
 914           expand_vector_operations_1 (&bsi);
 915         }
 916     }
 917 }
 918
 919
 920 struct tree_opt_pass pass_lower_vector_ssa =
 921 {
 922   "vector",                             /* name */
 923   NULL,                                 /* gate */
 924   expand_vector_operations,             /* execute */
 925   NULL,                                 /* sub */
 926   NULL,                                 /* next */
 927   0,                                    /* static_pass_number */
 928   0,                                    /* tv_id */
 929   PROP_cfg,                             /* properties_required */
 930   0,                                    /* properties_provided */
 931   0,                                    /* properties_destroyed */
 932   0,                                    /* todo_flags_start */
 933   TODO_dump_func | TODO_rename_vars     /* todo_flags_finish */
 934     | TODO_ggc_collect | TODO_verify_ssa
 935     | TODO_verify_stmts | TODO_verify_flow,
 936   0                                     /* letter */
 937 };
 938
 939 struct tree_opt_pass pass_pre_expand =
 940 {
 941   "oplower",                            /* name */
 942   0,                                    /* gate */
 943   tree_lower_operations,                /* execute */
 944   NULL,                                 /* sub */
 945   NULL,                                 /* next */
 946   0,                                    /* static_pass_number */
 947   0,                                    /* tv_id */
 948   PROP_cfg,                             /* properties_required */
 949   0,                                    /* properties_provided */
 950   0,                                    /* properties_destroyed */
 951   0,                                    /* todo_flags_start */
 952   TODO_dump_func | TODO_ggc_collect
 953     | TODO_verify_stmts,                /* todo_flags_finish */
 954   0                                     /* letter */
 955 };