gcc/tree-complex.c

   1 /* Lower complex number and vector operations to scalar operations.
   2    Copyright (C) 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tree.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "expr.h"
  28 #include "insn-codes.h"
  29 #include "diagnostic.h"
  30 #include "optabs.h"
  31 #include "machmode.h"
  32 #include "langhooks.h"
  33 #include "tree-flow.h"
  34 #include "tree-gimple.h"
  35 #include "tree-iterator.h"
  36 #include "tree-pass.h"
  37 #include "flags.h"
  38 #include "ggc.h"
  39
  40
  41 /* Extract the real or imaginary part of a complex variable or constant.
  42    Make sure that it's a proper gimple_val and gimplify it if not.
  43    Emit any new code before BSI.  */
  44
  45 static tree
  46 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
  47 {
  48   tree ret, inner_type;
  49
  50   inner_type = TREE_TYPE (TREE_TYPE (t));
  51   switch (TREE_CODE (t))
  52     {
  53     case COMPLEX_CST:
  54       ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
  55       break;
  56
  57     case COMPLEX_EXPR:
  58       ret = TREE_OPERAND (t, imagpart_p);
  59       break;
  60
  61     case VAR_DECL:
  62     case PARM_DECL:
  63       ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
  64                     inner_type, t);
  65       break;
  66
  67     default:
  68       gcc_unreachable ();
  69     }
  70
  71   return gimplify_val (bsi, inner_type, ret);
  72 }
  73
  74 /* Update an assignment to a complex variable in place.  */
  75
  76 static void
  77 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
  78 {
  79   tree stmt = bsi_stmt (*bsi);
  80   tree type;
  81
  82   if (TREE_CODE (stmt) == RETURN_EXPR)
  83     stmt = TREE_OPERAND (stmt, 0);
  84
  85   type = TREE_TYPE (TREE_OPERAND (stmt, 1));
  86   TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
  87   modify_stmt (stmt);
  88 }
  89
  90 /* Expand complex addition to scalars:
  91         a + b = (ar + br) + i(ai + bi)
  92         a - b = (ar - br) + i(ai + bi)
  93 */
  94
  95 static void
  96 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
  97                          tree ar, tree ai, tree br, tree bi,
  98                          enum tree_code code)
  99 {
 100   tree rr, ri;
 101
 102   rr = gimplify_build2 (bsi, code, inner_type, ar, br);
 103   ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
 104
 105   update_complex_assignment (bsi, rr, ri);
 106 }
 107
 108 /* Expand a complex multiplication or division to a libcall to the c99
 109    compliant routines.  */
 110
 111 static void
 112 expand_complex_libcall (block_stmt_iterator *bsi, tree ar, tree ai,
 113                         tree br, tree bi, enum tree_code code)
 114 {
 115   enum machine_mode mode;
 116   enum built_in_function bcode;
 117   tree args, fn, stmt, type;
 118
 119   args = tree_cons (NULL, bi, NULL);
 120   args = tree_cons (NULL, br, args);
 121   args = tree_cons (NULL, ai, args);
 122   args = tree_cons (NULL, ar, args);
 123
 124   stmt = bsi_stmt (*bsi);
 125   type = TREE_TYPE (TREE_OPERAND (stmt, 1));
 126
 127   mode = TYPE_MODE (type);
 128   gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
 129   if (code == MULT_EXPR)
 130     bcode = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
 131   else if (code == RDIV_EXPR)
 132     bcode = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
 133   else
 134     gcc_unreachable ();
 135   fn = built_in_decls[bcode];
 136
 137   TREE_OPERAND (stmt, 1)
 138     = build3 (CALL_EXPR, type, build_fold_addr_expr (fn), args, NULL);
 139   modify_stmt (stmt);
 140 }
 141
 142 /* Expand complex multiplication to scalars:
 143         a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
 144 */
 145
 146 static void
 147 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
 148                                tree ar, tree ai, tree br, tree bi)
 149 {
 150   tree t1, t2, t3, t4, rr, ri;
 151
 152   if (flag_complex_method == 2 && SCALAR_FLOAT_TYPE_P (inner_type))
 153     {
 154       expand_complex_libcall (bsi, ar, ai, br, bi, MULT_EXPR);
 155       return;
 156     }
 157
 158   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
 159   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
 160   t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
 161
 162   /* Avoid expanding redundant multiplication for the common
 163      case of squaring a complex number.  */
 164   if (ar == br && ai == bi)
 165     t4 = t3;
 166   else
 167     t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
 168
 169   rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
 170   ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
 171
 172   update_complex_assignment (bsi, rr, ri);
 173 }
 174
 175 /* Expand complex division to scalars, straightforward algorithm.
 176         a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
 177             t = br*br + bi*bi
 178 */
 179
 180 static void
 181 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
 182                              tree ar, tree ai, tree br, tree bi,
 183                              enum tree_code code)
 184 {
 185   tree rr, ri, div, t1, t2, t3;
 186
 187   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
 188   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
 189   div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
 190
 191   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
 192   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
 193   t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
 194   rr = gimplify_build2 (bsi, code, inner_type, t3, div);
 195
 196   t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
 197   t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
 198   t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
 199   ri = gimplify_build2 (bsi, code, inner_type, t3, div);
 200
 201   update_complex_assignment (bsi, rr, ri);
 202 }
 203
 204 /* Expand complex division to scalars, modified algorithm to minimize
 205    overflow with wide input ranges.  */
 206
 207 static void
 208 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
 209                          tree ar, tree ai, tree br, tree bi,
 210                          enum tree_code code)
 211 {
 212   tree rr, ri, ratio, div, t1, t2, tr, ti, cond;
 213   basic_block bb_cond, bb_true, bb_false, bb_join;
 214
 215   /* Examine |br| < |bi|, and branch.  */
 216   t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
 217   t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
 218   cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
 219   STRIP_NOPS (cond);
 220
 221   bb_cond = bb_true = bb_false = bb_join = NULL;
 222   rr = ri = tr = ti = NULL;
 223   if (!TREE_CONSTANT (cond))
 224     {
 225       edge e;
 226
 227       cond = build (COND_EXPR, void_type_node, cond, NULL, NULL);
 228       bsi_insert_before (bsi, cond, BSI_SAME_STMT);
 229
 230       /* Split the original block, and create the TRUE and FALSE blocks.  */
 231       e = split_block (bsi->bb, cond);
 232       bb_cond = e->src;
 233       bb_join = e->dest;
 234       bb_true = create_empty_bb (bb_cond);
 235       bb_false = create_empty_bb (bb_true);
 236
 237       t1 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_true));
 238       t2 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_false));
 239       COND_EXPR_THEN (cond) = t1;
 240       COND_EXPR_ELSE (cond) = t2;
 241
 242       /* Wire the blocks together.  */
 243       e->flags = EDGE_TRUE_VALUE;
 244       redirect_edge_succ (e, bb_true);
 245       make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
 246       make_edge (bb_true, bb_join, EDGE_FALLTHRU);
 247       make_edge (bb_false, bb_join, EDGE_FALLTHRU);
 248
 249       /* Update dominance info.  Note that bb_join's data was
 250          updated by split_block.  */
 251       if (dom_info_available_p (CDI_DOMINATORS))
 252         {
 253           set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
 254           set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
 255         }
 256
 257       rr = make_rename_temp (inner_type, NULL);
 258       ri = make_rename_temp (inner_type, NULL);
 259     }
 260
 261   /* In the TRUE branch, we compute
 262       ratio = br/bi;
 263       div = (br * ratio) + bi;
 264       tr = (ar * ratio) + ai;
 265       ti = (ai * ratio) - ar;
 266       tr = tr / div;
 267       ti = ti / div;  */
 268   if (bb_true || integer_nonzerop (cond))
 269     {
 270       if (bb_true)
 271         {
 272           *bsi = bsi_last (bb_true);
 273           bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT);
 274         }
 275
 276       ratio = gimplify_build2 (bsi, code, inner_type, br, bi);
 277
 278       t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, ratio);
 279       div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, bi);
 280
 281       t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
 282       tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ai);
 283
 284       t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
 285       ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, ar);
 286
 287       tr = gimplify_build2 (bsi, code, inner_type, tr, div);
 288       ti = gimplify_build2 (bsi, code, inner_type, ti, div);
 289
 290      if (bb_true)
 291        {
 292          t1 = build (MODIFY_EXPR, inner_type, rr, tr);
 293          bsi_insert_before (bsi, t1, BSI_SAME_STMT);
 294          t1 = build (MODIFY_EXPR, inner_type, ri, ti);
 295          bsi_insert_before (bsi, t1, BSI_SAME_STMT);
 296          bsi_remove (bsi);
 297        }
 298     }
 299
 300   /* In the FALSE branch, we compute
 301       ratio = d/c;
 302       divisor = (d * ratio) + c;
 303       tr = (b * ratio) + a;
 304       ti = b - (a * ratio);
 305       tr = tr / div;
 306       ti = ti / div;  */
 307   if (bb_false || integer_zerop (cond))
 308     {
 309       if (bb_false)
 310         {
 311           *bsi = bsi_last (bb_false);
 312           bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT);
 313         }
 314
 315       ratio = gimplify_build2 (bsi, code, inner_type, bi, br);
 316
 317       t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, ratio);
 318       div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, br);
 319
 320       t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
 321       tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ar);
 322
 323       t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
 324       ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
 325
 326       tr = gimplify_build2 (bsi, code, inner_type, tr, div);
 327       ti = gimplify_build2 (bsi, code, inner_type, ti, div);
 328
 329      if (bb_false)
 330        {
 331          t1 = build (MODIFY_EXPR, inner_type, rr, tr);
 332          bsi_insert_before (bsi, t1, BSI_SAME_STMT);
 333          t1 = build (MODIFY_EXPR, inner_type, ri, ti);
 334          bsi_insert_before (bsi, t1, BSI_SAME_STMT);
 335          bsi_remove (bsi);
 336        }
 337     }
 338
 339   if (bb_join)
 340     *bsi = bsi_start (bb_join);
 341   else
 342     rr = tr, ri = ti;
 343
 344   update_complex_assignment (bsi, rr, ri);
 345 }
 346
 347 /* Expand complex division to scalars.  */
 348
 349 static void
 350 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
 351                          tree ar, tree ai, tree br, tree bi,
 352                          enum tree_code code)
 353 {
 354   switch (flag_complex_method)
 355     {
 356     case 0:
 357       /* straightforward implementation of complex divide acceptable.  */
 358       expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
 359       break;
 360
 361     case 2:
 362       if (SCALAR_FLOAT_TYPE_P (inner_type))
 363         {
 364           expand_complex_libcall (bsi, ar, ai, br, bi, code);
 365           return;
 366         }
 367       /* FALLTHRU */
 368
 369     case 1:
 370       /* wide ranges of inputs must work for complex divide.  */
 371       expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
 372       break;
 373
 374     default:
 375       gcc_unreachable ();
 376     }
 377 }
 378
 379 /* Expand complex negation to scalars:
 380         -a = (-ar) + i(-ai)
 381 */
 382
 383 static void
 384 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
 385                          tree ar, tree ai)
 386 {
 387   tree rr, ri;
 388
 389   rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
 390   ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
 391
 392   update_complex_assignment (bsi, rr, ri);
 393 }
 394
 395 /* Expand complex conjugate to scalars:
 396         ~a = (ar) + i(-ai)
 397 */
 398
 399 static void
 400 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
 401                           tree ar, tree ai)
 402 {
 403   tree ri;
 404
 405   ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
 406
 407   update_complex_assignment (bsi, ar, ri);
 408 }
 409
 410 /* Expand complex comparison (EQ or NE only).  */
 411
 412 static void
 413 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
 414                            tree br, tree bi, enum tree_code code)
 415 {
 416   tree cr, ci, cc, stmt, expr, type;
 417
 418   cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
 419   ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
 420   cc = gimplify_build2 (bsi,
 421                         (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
 422                         boolean_type_node, cr, ci);
 423
 424   stmt = expr = bsi_stmt (*bsi);
 425
 426   switch (TREE_CODE (stmt))
 427     {
 428     case RETURN_EXPR:
 429       expr = TREE_OPERAND (stmt, 0);
 430       /* FALLTHRU */
 431     case MODIFY_EXPR:
 432       type = TREE_TYPE (TREE_OPERAND (expr, 1));
 433       TREE_OPERAND (expr, 1) = fold_convert (type, cc);
 434       break;
 435     case COND_EXPR:
 436       TREE_OPERAND (stmt, 0) = cc;
 437       break;
 438     default:
 439       gcc_unreachable ();
 440     }
 441
 442   modify_stmt (stmt);
 443 }
 444
 445 /* Process one statement.  If we identify a complex operation, expand it.  */
 446
 447 static void
 448 expand_complex_operations_1 (block_stmt_iterator *bsi)
 449 {
 450   tree stmt = bsi_stmt (*bsi);
 451   tree rhs, type, inner_type;
 452   tree ac, ar, ai, bc, br, bi;
 453   enum tree_code code;
 454
 455   switch (TREE_CODE (stmt))
 456     {
 457     case RETURN_EXPR:
 458       stmt = TREE_OPERAND (stmt, 0);
 459       if (!stmt)
 460         return;
 461       if (TREE_CODE (stmt) != MODIFY_EXPR)
 462         return;
 463       /* FALLTHRU */
 464
 465     case MODIFY_EXPR:
 466       rhs = TREE_OPERAND (stmt, 1);
 467       break;
 468
 469     case COND_EXPR:
 470       rhs = TREE_OPERAND (stmt, 0);
 471       break;
 472
 473     default:
 474       return;
 475     }
 476
 477   type = TREE_TYPE (rhs);
 478   code = TREE_CODE (rhs);
 479
 480   /* Initial filter for operations we handle.  */
 481   switch (code)
 482     {
 483     case PLUS_EXPR:
 484     case MINUS_EXPR:
 485     case MULT_EXPR:
 486     case TRUNC_DIV_EXPR:
 487     case CEIL_DIV_EXPR:
 488     case FLOOR_DIV_EXPR:
 489     case ROUND_DIV_EXPR:
 490     case RDIV_EXPR:
 491     case NEGATE_EXPR:
 492     case CONJ_EXPR:
 493       if (TREE_CODE (type) != COMPLEX_TYPE)
 494         return;
 495       inner_type = TREE_TYPE (type);
 496       break;
 497
 498     case EQ_EXPR:
 499     case NE_EXPR:
 500       inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
 501       if (TREE_CODE (inner_type) != COMPLEX_TYPE)
 502         return;
 503       break;
 504
 505     default:
 506       return;
 507     }
 508
 509   /* Extract the components of the two complex values.  Make sure and
 510      handle the common case of the same value used twice specially.  */
 511   ac = TREE_OPERAND (rhs, 0);
 512   ar = extract_component (bsi, ac, 0);
 513   ai = extract_component (bsi, ac, 1);
 514
 515   if (TREE_CODE_CLASS (code) == tcc_unary)
 516     bc = br = bi = NULL;
 517   else
 518     {
 519       bc = TREE_OPERAND (rhs, 1);
 520       if (ac == bc)
 521         br = ar, bi = ai;
 522       else
 523         {
 524           br = extract_component (bsi, bc, 0);
 525           bi = extract_component (bsi, bc, 1);
 526         }
 527     }
 528
 529   switch (code)
 530     {
 531     case PLUS_EXPR:
 532     case MINUS_EXPR:
 533       expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
 534       break;
 535
 536     case MULT_EXPR:
 537       expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
 538       break;
 539
 540     case TRUNC_DIV_EXPR:
 541     case CEIL_DIV_EXPR:
 542     case FLOOR_DIV_EXPR:
 543     case ROUND_DIV_EXPR:
 544     case RDIV_EXPR:
 545       expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
 546       break;
 547
 548     case NEGATE_EXPR:
 549       expand_complex_negation (bsi, inner_type, ar, ai);
 550       break;
 551
 552     case CONJ_EXPR:
 553       expand_complex_conjugate (bsi, inner_type, ar, ai);
 554       break;
 555
 556     case EQ_EXPR:
 557     case NE_EXPR:
 558       expand_complex_comparison (bsi, ar, ai, br, bi, code);
 559       break;
 560
 561     default:
 562       gcc_unreachable ();
 563     }
 564 }
 565 \f
 566 /* Build a constant of type TYPE, made of VALUE's bits replicated
 567    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
 568 static tree
 569 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
 570 {
 571   int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
 572   int n = HOST_BITS_PER_WIDE_INT / width;
 573   unsigned HOST_WIDE_INT low, high, mask;
 574   tree ret;
 575
 576   gcc_assert (n);
 577
 578   if (width == HOST_BITS_PER_WIDE_INT)
 579     low = value;
 580   else
 581     {
 582       mask = ((HOST_WIDE_INT)1 << width) - 1;
 583       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
 584     }
 585
 586   if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
 587     low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
 588   else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
 589     high = 0;
 590   else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
 591     high = low;
 592   else
 593     gcc_unreachable ();
 594
 595   ret = build_int_cst_wide (type, low, high);
 596   return ret;
 597 }
 598
 599 static GTY(()) tree vector_inner_type;
 600 static GTY(()) tree vector_last_type;
 601 static GTY(()) int vector_last_nunits;
 602
 603 /* Return a suitable vector types made of SUBPARTS units each of mode
 604    "word_mode" (the global variable).  */
 605 static tree
 606 build_word_mode_vector_type (int nunits)
 607 {
 608   if (!vector_inner_type)
 609     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
 610   else if (vector_last_nunits == nunits)
 611     {
 612       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
 613       return vector_last_type;
 614     }
 615
 616   /* We build a new type, but we canonicalize it nevertheless,
 617      because it still saves some memory.  */
 618   vector_last_nunits = nunits;
 619   vector_last_type = type_hash_canon (nunits,
 620                                       build_vector_type (vector_inner_type,
 621                                                          nunits));
 622   return vector_last_type;
 623 }
 624
 625 typedef tree (*elem_op_func) (block_stmt_iterator *,
 626                               tree, tree, tree, tree, tree, enum tree_code);
 627
 628 static inline tree
 629 tree_vec_extract (block_stmt_iterator *bsi, tree type,
 630                   tree t, tree bitsize, tree bitpos)
 631 {
 632   if (bitpos)
 633     return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
 634   else
 635     return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
 636 }
 637
 638 static tree
 639 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
 640          tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
 641          enum tree_code code)
 642 {
 643   a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
 644   return gimplify_build1 (bsi, code, inner_type, a);
 645 }
 646
 647 static tree
 648 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
 649           tree bitpos, tree bitsize, enum tree_code code)
 650 {
 651   a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
 652   b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
 653   return gimplify_build2 (bsi, code, inner_type, a, b);
 654 }
 655
 656 /* Expand vector addition to scalars.  This does bit twiddling
 657    in order to increase parallelism:
 658
 659    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
 660            (a ^ b) & 0x80808080
 661
 662    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
 663             (a ^ ~b) & 0x80808080
 664
 665    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
 666
 667    This optimization should be done only if 4 vector items or more
 668    fit into a word.  */
 669 static tree
 670 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
 671                tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
 672                enum tree_code code)
 673 {
 674   tree inner_type = TREE_TYPE (TREE_TYPE (a));
 675   unsigned HOST_WIDE_INT max;
 676   tree low_bits, high_bits, a_low, b_low, result_low, signs;
 677
 678   max = GET_MODE_MASK (TYPE_MODE (inner_type));
 679   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
 680   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
 681
 682   a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
 683   b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
 684
 685   signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
 686   b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
 687   if (code == PLUS_EXPR)
 688     a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
 689   else
 690     {
 691       a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
 692       signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
 693     }
 694
 695   signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
 696   result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
 697   return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
 698 }
 699
 700 static tree
 701 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
 702            tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
 703            tree bitsize ATTRIBUTE_UNUSED,
 704            enum tree_code code ATTRIBUTE_UNUSED)
 705 {
 706   tree inner_type = TREE_TYPE (TREE_TYPE (b));
 707   HOST_WIDE_INT max;
 708   tree low_bits, high_bits, b_low, result_low, signs;
 709
 710   max = GET_MODE_MASK (TYPE_MODE (inner_type));
 711   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
 712   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
 713
 714   b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
 715
 716   b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
 717   signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
 718   signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
 719   result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
 720   return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
 721 }
 722
 723 /* Expand a vector operation to scalars, by using many operations
 724    whose type is the vector type's inner type.  */
 725 static tree
 726 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
 727                          tree type, tree inner_type,
 728                          tree a, tree b, enum tree_code code)
 729 {
 730   tree head, *chain = &head;
 731   tree part_width = TYPE_SIZE (inner_type);
 732   tree index = bitsize_int (0);
 733   int nunits = TYPE_VECTOR_SUBPARTS (type);
 734   int delta = tree_low_cst (part_width, 1)
 735               / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
 736   int i;
 737
 738   for (i = 0; i < nunits;
 739        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
 740     {
 741       tree result = f (bsi, inner_type, a, b, index, part_width, code);
 742       *chain = tree_cons (NULL_TREE, result, NULL_TREE);
 743       chain = &TREE_CHAIN (*chain);
 744     }
 745
 746   return build1 (CONSTRUCTOR, type, head);
 747 }
 748
 749 /* Expand a vector operation to scalars with the freedom to use
 750    a scalar integer type, or to use a different size for the items
 751    in the vector type.  */
 752 static tree
 753 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
 754                         tree a, tree b,
 755                         enum tree_code code)
 756 {
 757   tree result, compute_type;
 758   enum machine_mode mode;
 759   int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
 760
 761   /* We have three strategies.  If the type is already correct, just do
 762      the operation an element at a time.  Else, if the vector is wider than
 763      one word, do it a word at a time; finally, if the vector is smaller
 764      than one word, do it as a scalar.  */
 765   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
 766      return expand_vector_piecewise (bsi, f,
 767                                      type, TREE_TYPE (type),
 768                                      a, b, code);
 769   else if (n_words > 1)
 770     {
 771       tree word_type = build_word_mode_vector_type (n_words);
 772       result = expand_vector_piecewise (bsi, f,
 773                                         word_type, TREE_TYPE (word_type),
 774                                         a, b, code);
 775       result = gimplify_val (bsi, word_type, result);
 776     }
 777   else
 778     {
 779       /* Use a single scalar operation with a mode no wider than word_mode.  */
 780       mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
 781       compute_type = lang_hooks.types.type_for_mode (mode, 1);
 782       result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
 783     }
 784
 785   return build1 (VIEW_CONVERT_EXPR, type, result);
 786 }
 787
 788 /* Expand a vector operation to scalars; for integer types we can use
 789    special bit twiddling tricks to do the sums a word at a time, using
 790    function F_PARALLEL instead of F.  These tricks are done only if
 791    they can process at least four items, that is, only if the vector
 792    holds at least four items and if a word can hold four items.  */
 793 static tree
 794 expand_vector_addition (block_stmt_iterator *bsi,
 795                         elem_op_func f, elem_op_func f_parallel,
 796                         tree type, tree a, tree b, enum tree_code code)
 797 {
 798   int parts_per_word = UNITS_PER_WORD
 799                        / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
 800
 801   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
 802       && parts_per_word >= 4
 803       && TYPE_VECTOR_SUBPARTS (type) >= 4)
 804     return expand_vector_parallel (bsi, f_parallel,
 805                                    type, a, b, code);
 806   else
 807     return expand_vector_piecewise (bsi, f,
 808                                     type, TREE_TYPE (type),
 809                                     a, b, code);
 810 }
 811
 812 /* Return a type for the widest vector mode whose components are of mode
 813    INNER_MODE, or NULL_TREE if none is found.  */
 814 static tree
 815 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
 816 {
 817   enum machine_mode best_mode = VOIDmode, mode;
 818   int best_nunits = 0;
 819
 820   if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
 821     mode = MIN_MODE_VECTOR_FLOAT;
 822   else
 823     mode = MIN_MODE_VECTOR_INT;
 824
 825   for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
 826     if (GET_MODE_INNER (mode) == inner_mode
 827         && GET_MODE_NUNITS (mode) > best_nunits
 828         && op->handlers[mode].insn_code != CODE_FOR_nothing)
 829       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
 830
 831   if (best_mode == VOIDmode)
 832     return NULL_TREE;
 833   else
 834     return lang_hooks.types.type_for_mode (best_mode, 1);
 835 }
 836
 837 /* Process one statement.  If we identify a vector operation, expand it.  */
 838
 839 static void
 840 expand_vector_operations_1 (block_stmt_iterator *bsi)
 841 {
 842   tree stmt = bsi_stmt (*bsi);
 843   tree *p_rhs, rhs, type, compute_type;
 844   enum tree_code code;
 845   enum machine_mode compute_mode;
 846   optab op;
 847
 848   switch (TREE_CODE (stmt))
 849     {
 850     case RETURN_EXPR:
 851       stmt = TREE_OPERAND (stmt, 0);
 852       if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
 853         return;
 854
 855       /* FALLTHRU */
 856
 857     case MODIFY_EXPR:
 858       p_rhs = &TREE_OPERAND (stmt, 1);
 859       rhs = *p_rhs;
 860       break;
 861
 862     default:
 863       return;
 864     }
 865
 866   type = TREE_TYPE (rhs);
 867   if (TREE_CODE (type) != VECTOR_TYPE)
 868     return;
 869
 870   code = TREE_CODE (rhs);
 871   if (TREE_CODE_CLASS (code) != tcc_unary
 872       && TREE_CODE_CLASS (code) != tcc_binary)
 873     return;
 874
 875   if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
 876     return;
 877
 878   gcc_assert (code != CONVERT_EXPR);
 879   op = optab_for_tree_code (code, type);
 880
 881   /* Optabs will try converting a negation into a subtraction, so
 882      look for it as well.  TODO: negation of floating-point vectors
 883      might be turned into an exclusive OR toggling the sign bit.  */
 884   if (op == NULL
 885       && code == NEGATE_EXPR
 886       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
 887     op = optab_for_tree_code (MINUS_EXPR, type);
 888
 889   /* For very wide vectors, try using a smaller vector mode.  */
 890   compute_type = type;
 891   if (TYPE_MODE (type) == BLKmode && op)
 892     {
 893       tree vector_compute_type
 894         = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
 895       if (vector_compute_type != NULL_TREE)
 896         compute_type = vector_compute_type;
 897     }
 898
 899   compute_mode = TYPE_MODE (compute_type);
 900
 901   /* If we are breaking a BLKmode vector into smaller pieces,
 902      type_for_widest_vector_mode has already looked into the optab,
 903      so skip these checks.  */
 904   if (compute_type == type)
 905     {
 906       if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
 907            || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
 908           && op != NULL
 909           && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
 910         return;
 911       else
 912         {
 913           /* There is no operation in hardware, so fall back to scalars.  */
 914           compute_type = TREE_TYPE (type);
 915           compute_mode = TYPE_MODE (compute_type);
 916         }
 917     }
 918
 919   /* If the compute mode is not a vector mode (hence we are decomposing
 920      a BLKmode vector to smaller, hardware-supported vectors), we may
 921      want to expand the operations in parallel.  */
 922   if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
 923       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
 924     switch (code)
 925       {
 926       case PLUS_EXPR:
 927       case MINUS_EXPR:
 928         if (TYPE_TRAP_SIGNED (type))
 929           break;
 930
 931         *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
 932                                          TREE_OPERAND (rhs, 0),
 933                                          TREE_OPERAND (rhs, 1), code);
 934         modify_stmt (bsi_stmt (*bsi));
 935         return;
 936
 937       case NEGATE_EXPR:
 938         if (TYPE_TRAP_SIGNED (type))
 939           break;
 940
 941         *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
 942                                          TREE_OPERAND (rhs, 0),
 943                                          NULL_TREE, code);
 944         modify_stmt (bsi_stmt (*bsi));
 945         return;
 946
 947       case BIT_AND_EXPR:
 948       case BIT_IOR_EXPR:
 949       case BIT_XOR_EXPR:
 950         *p_rhs = expand_vector_parallel (bsi, do_binop, type,
 951                                          TREE_OPERAND (rhs, 0),
 952                                          TREE_OPERAND (rhs, 1), code);
 953         modify_stmt (bsi_stmt (*bsi));
 954         return;
 955
 956       case BIT_NOT_EXPR:
 957         *p_rhs = expand_vector_parallel (bsi, do_unop, type,
 958                                          TREE_OPERAND (rhs, 0),
 959                                          NULL_TREE, code);
 960         modify_stmt (bsi_stmt (*bsi));
 961         return;
 962
 963       default:
 964         break;
 965       }
 966
 967   if (TREE_CODE_CLASS (code) == tcc_unary)
 968     *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
 969                                       TREE_OPERAND (rhs, 0),
 970                                       NULL_TREE, code);
 971   else
 972     *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
 973                                       TREE_OPERAND (rhs, 0),
 974                                       TREE_OPERAND (rhs, 1), code);
 975
 976   modify_stmt (bsi_stmt (*bsi));
 977 }
 978 \f
 979 static void
 980 expand_vector_operations (void)
 981 {
 982   block_stmt_iterator bsi;
 983   basic_block bb;
 984
 985   FOR_EACH_BB (bb)
 986     {
 987       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 988         expand_vector_operations_1 (&bsi);
 989     }
 990 }
 991
 992 static void
 993 tree_lower_operations (void)
 994 {
 995   int old_last_basic_block = last_basic_block;
 996   block_stmt_iterator bsi;
 997   basic_block bb;
 998
 999   FOR_EACH_BB (bb)
1000     {
1001       if (bb->index >= old_last_basic_block)
1002         continue;
1003       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1004         {
1005           expand_complex_operations_1 (&bsi);
1006           expand_vector_operations_1 (&bsi);
1007         }
1008     }
1009 }
1010
1011
1012 struct tree_opt_pass pass_lower_vector_ssa =
1013 {
1014   "vector",                             /* name */
1015   NULL,                                 /* gate */
1016   expand_vector_operations,             /* execute */
1017   NULL,                                 /* sub */
1018   NULL,                                 /* next */
1019   0,                                    /* static_pass_number */
1020   0,                                    /* tv_id */
1021   PROP_cfg,                             /* properties_required */
1022   0,                                    /* properties_provided */
1023   0,                                    /* properties_destroyed */
1024   0,                                    /* todo_flags_start */
1025   TODO_dump_func | TODO_rename_vars     /* todo_flags_finish */
1026     | TODO_ggc_collect | TODO_verify_ssa
1027     | TODO_verify_stmts | TODO_verify_flow,
1028   0                                     /* letter */
1029 };
1030
1031 struct tree_opt_pass pass_pre_expand =
1032 {
1033   "oplower",                            /* name */
1034   0,                                    /* gate */
1035   tree_lower_operations,                /* execute */
1036   NULL,                                 /* sub */
1037   NULL,                                 /* next */
1038   0,                                    /* static_pass_number */
1039   0,                                    /* tv_id */
1040   PROP_cfg,                             /* properties_required */
1041   0,                                    /* properties_provided */
1042   0,                                    /* properties_destroyed */
1043   0,                                    /* todo_flags_start */
1044   TODO_dump_func | TODO_ggc_collect
1045     | TODO_verify_stmts,                /* todo_flags_finish */
1046   0                                     /* letter */
1047 };
1048
1049 #include "gt-tree-complex.h"