gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for both scalar and vector floating point types supported by VSX
  22 (define_mode_iterator VSX_B [DF V4SF V2DF])
  23
  24 ;; Iterator for the 2 64-bit vector types
  25 (define_mode_iterator VSX_D [V2DF V2DI])
  26
  27 ;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with
  28 ;; lxvd2x to properly handle swapping words on little endian
  29 (define_mode_iterator VSX_LE [V2DF
  30                               V2DI
  31                               V1TI
  32                               (TI       "VECTOR_MEM_VSX_P (TImode)")])
  33
  34 ;; Iterator for the 2 32-bit vector types
  35 (define_mode_iterator VSX_W [V4SF V4SI])
  36
  37 ;; Iterator for the DF types
  38 (define_mode_iterator VSX_DF [V2DF DF])
  39
  40 ;; Iterator for vector floating point types supported by VSX
  41 (define_mode_iterator VSX_F [V4SF V2DF])
  42
  43 ;; Iterator for logical types supported by VSX
  44 (define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
  45
  46 ;; Iterator for memory move.  Handle TImode specially to allow
  47 ;; it to use gprs as well as vsx registers.
  48 (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
  49
  50 (define_mode_iterator VSX_M2 [V16QI
  51                               V8HI
  52                               V4SI
  53                               V2DI
  54                               V4SF
  55                               V2DF
  56                               V1TI
  57                               (TI       "TARGET_VSX_TIMODE")])
  58
  59 ;; Map into the appropriate load/store name based on the type
  60 (define_mode_attr VSm  [(V16QI "vw4")
  61                         (V8HI  "vw4")
  62                         (V4SI  "vw4")
  63                         (V4SF  "vw4")
  64                         (V2DF  "vd2")
  65                         (V2DI  "vd2")
  66                         (DF    "d")
  67                         (V1TI  "vd2")
  68                         (TI    "vd2")])
  69
  70 ;; Map into the appropriate suffix based on the type
  71 (define_mode_attr VSs   [(V16QI "sp")
  72                          (V8HI  "sp")
  73                          (V4SI  "sp")
  74                          (V4SF  "sp")
  75                          (V2DF  "dp")
  76                          (V2DI  "dp")
  77                          (DF    "dp")
  78                          (SF    "sp")
  79                          (V1TI  "dp")
  80                          (TI    "dp")])
  81
  82 ;; Map the register class used
  83 (define_mode_attr VSr   [(V16QI "v")
  84                          (V8HI  "v")
  85                          (V4SI  "v")
  86                          (V4SF  "wf")
  87                          (V2DI  "wd")
  88                          (V2DF  "wd")
  89                          (DI    "wi")
  90                          (DF    "ws")
  91                          (SF    "ww")
  92                          (V1TI  "v")
  93                          (TI    "wt")])
  94
  95 ;; Map the register class used for float<->int conversions (floating point side)
  96 ;; VSr2 is the preferred register class, VSr3 is any register class that will
  97 ;; hold the data
  98 (define_mode_attr VSr2  [(V2DF  "wd")
  99                          (V4SF  "wf")
 100                          (DF    "ws")
 101                          (SF    "ww")
 102                          (DI    "wi")])
 103
 104 (define_mode_attr VSr3  [(V2DF  "wa")
 105                          (V4SF  "wa")
 106                          (DF    "ws")
 107                          (SF    "ww")
 108                          (DI    "wi")])
 109
 110 ;; Map the register class for sp<->dp float conversions, destination
 111 (define_mode_attr VSr4  [(SF    "ws")
 112                          (DF    "f")
 113                          (V2DF  "wd")
 114                          (V4SF  "v")])
 115
 116 ;; Map the register class for sp<->dp float conversions, source
 117 (define_mode_attr VSr5  [(SF    "ws")
 118                          (DF    "f")
 119                          (V2DF  "v")
 120                          (V4SF  "wd")])
 121
 122 ;; The VSX register class that a type can occupy, even if it is not the
 123 ;; preferred register class (VSr is the preferred register class that will get
 124 ;; allocated first).
 125 (define_mode_attr VSa   [(V16QI "wa")
 126                          (V8HI  "wa")
 127                          (V4SI  "wa")
 128                          (V4SF  "wa")
 129                          (V2DI  "wa")
 130                          (V2DF  "wa")
 131                          (DI    "wi")
 132                          (DF    "ws")
 133                          (SF    "ww")
 134                          (V1TI  "wa")
 135                          (TI    "wt")])
 136
 137 ;; Same size integer type for floating point data
 138 (define_mode_attr VSi [(V4SF  "v4si")
 139                        (V2DF  "v2di")
 140                        (DF    "di")])
 141
 142 (define_mode_attr VSI [(V4SF  "V4SI")
 143                        (V2DF  "V2DI")
 144                        (DF    "DI")])
 145
 146 ;; Word size for same size conversion
 147 (define_mode_attr VSc [(V4SF "w")
 148                        (V2DF "d")
 149                        (DF   "d")])
 150
 151 ;; Map into either s or v, depending on whether this is a scalar or vector
 152 ;; operation
 153 (define_mode_attr VSv   [(V16QI "v")
 154                          (V8HI  "v")
 155                          (V4SI  "v")
 156                          (V4SF  "v")
 157                          (V2DI  "v")
 158                          (V2DF  "v")
 159                          (V1TI  "v")
 160                          (DF    "s")])
 161
 162 ;; Appropriate type for add ops (and other simple FP ops)
 163 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 164                                  (V4SF "vecfloat")
 165                                  (DF   "fp")])
 166
 167 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
 168                                    (V4SF "fp_addsub_s")
 169                                    (DF   "fp_addsub_d")])
 170
 171 ;; Appropriate type for multiply ops
 172 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 173                                  (V4SF "vecfloat")
 174                                  (DF   "dmul")])
 175
 176 (define_mode_attr VSfptype_mul  [(V2DF "fp_mul_d")
 177                                  (V4SF "fp_mul_s")
 178                                  (DF   "fp_mul_d")])
 179
 180 ;; Appropriate type for divide ops.
 181 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 182                                  (V4SF "vecfdiv")
 183                                  (DF   "ddiv")])
 184
 185 (define_mode_attr VSfptype_div  [(V2DF "fp_div_d")
 186                                  (V4SF "fp_div_s")
 187                                  (DF   "fp_div_d")])
 188
 189 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 190 ;; the scalar sqrt
 191 (define_mode_attr VStype_sqrt   [(V2DF "dsqrt")
 192                                  (V4SF "ssqrt")
 193                                  (DF   "dsqrt")])
 194
 195 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
 196                                  (V4SF "fp_sqrt_s")
 197                                  (DF   "fp_sqrt_d")])
 198
 199 ;; Iterator and modes for sp<->dp conversions
 200 ;; Because scalar SF values are represented internally as double, use the
 201 ;; V4SF type to represent this than SF.
 202 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
 203
 204 (define_mode_attr VS_spdp_res [(DF      "V4SF")
 205                                (V4SF    "V2DF")
 206                                (V2DF    "V4SF")])
 207
 208 (define_mode_attr VS_spdp_insn [(DF     "xscvdpsp")
 209                                 (V4SF   "xvcvspdp")
 210                                 (V2DF   "xvcvdpsp")])
 211
 212 (define_mode_attr VS_spdp_type [(DF     "fp")
 213                                 (V4SF   "vecdouble")
 214                                 (V2DF   "vecdouble")])
 215
 216 ;; Map the scalar mode for a vector type
 217 (define_mode_attr VS_scalar [(V1TI      "TI")
 218                              (V2DF      "DF")
 219                              (V2DI      "DI")
 220                              (V4SF      "SF")
 221                              (V4SI      "SI")
 222                              (V8HI      "HI")
 223                              (V16QI     "QI")])
 224
 225 ;; Map to a double-sized vector mode
 226 (define_mode_attr VS_double [(V4SI      "V8SI")
 227                              (V4SF      "V8SF")
 228                              (V2DI      "V4DI")
 229                              (V2DF      "V4DF")
 230                              (V1TI      "V2TI")])
 231
 232 ;; Map register class for 64-bit element in 128-bit vector for direct moves
 233 ;; to/from gprs
 234 (define_mode_attr VS_64dm [(V2DF        "wk")
 235                            (V2DI        "wj")])
 236
 237 ;; Map register class for 64-bit element in 128-bit vector for normal register
 238 ;; to register moves
 239 (define_mode_attr VS_64reg [(V2DF       "ws")
 240                             (V2DI       "wi")])
 241
 242 ;; Constants for creating unspecs
 243 (define_c_enum "unspec"
 244   [UNSPEC_VSX_CONCAT
 245    UNSPEC_VSX_CVDPSXWS
 246    UNSPEC_VSX_CVDPUXWS
 247    UNSPEC_VSX_CVSPDP
 248    UNSPEC_VSX_CVSPDPN
 249    UNSPEC_VSX_CVDPSPN
 250    UNSPEC_VSX_CVSXWDP
 251    UNSPEC_VSX_CVUXWDP
 252    UNSPEC_VSX_CVSXDSP
 253    UNSPEC_VSX_CVUXDSP
 254    UNSPEC_VSX_CVSPSXDS
 255    UNSPEC_VSX_CVSPUXDS
 256    UNSPEC_VSX_TDIV
 257    UNSPEC_VSX_TSQRT
 258    UNSPEC_VSX_SET
 259    UNSPEC_VSX_ROUND_I
 260    UNSPEC_VSX_ROUND_IC
 261    UNSPEC_VSX_SLDWI
 262    UNSPEC_VSX_XXSPLTW
 263    UNSPEC_VSX_XXSPLTD
 264    UNSPEC_VSX_DIVSD
 265    UNSPEC_VSX_DIVUD
 266    UNSPEC_VSX_MULSD
 267    UNSPEC_VSX_XVCVSXDDP
 268    UNSPEC_VSX_XVCVUXDDP
 269    UNSPEC_VSX_XVCVDPSXDS
 270    UNSPEC_VSX_XVCVDPUXDS
 271   ])
 272
 273 ;; VSX moves
 274
 275 ;; The patterns for LE permuted loads and stores come before the general
 276 ;; VSX moves so they match first.
 277 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 278   [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
 279         (match_operand:VSX_LE 1 "memory_operand" "Z"))]
 280   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 281   "#"
 282   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 283   [(set (match_dup 2)
 284         (vec_select:<MODE>
 285           (match_dup 1)
 286           (parallel [(const_int 1) (const_int 0)])))
 287    (set (match_dup 0)
 288         (vec_select:<MODE>
 289           (match_dup 2)
 290           (parallel [(const_int 1) (const_int 0)])))]
 291   "
 292 {
 293   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 294                                        : operands[0];
 295 }
 296   "
 297   [(set_attr "type" "vecload")
 298    (set_attr "length" "8")])
 299
 300 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 301   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
 302         (match_operand:VSX_W 1 "memory_operand" "Z"))]
 303   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 304   "#"
 305   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 306   [(set (match_dup 2)
 307         (vec_select:<MODE>
 308           (match_dup 1)
 309           (parallel [(const_int 2) (const_int 3)
 310                      (const_int 0) (const_int 1)])))
 311    (set (match_dup 0)
 312         (vec_select:<MODE>
 313           (match_dup 2)
 314           (parallel [(const_int 2) (const_int 3)
 315                      (const_int 0) (const_int 1)])))]
 316   "
 317 {
 318   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 319                                        : operands[0];
 320 }
 321   "
 322   [(set_attr "type" "vecload")
 323    (set_attr "length" "8")])
 324
 325 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 326   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 327         (match_operand:V8HI 1 "memory_operand" "Z"))]
 328   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 329   "#"
 330   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 331   [(set (match_dup 2)
 332         (vec_select:V8HI
 333           (match_dup 1)
 334           (parallel [(const_int 4) (const_int 5)
 335                      (const_int 6) (const_int 7)
 336                      (const_int 0) (const_int 1)
 337                      (const_int 2) (const_int 3)])))
 338    (set (match_dup 0)
 339         (vec_select:V8HI
 340           (match_dup 2)
 341           (parallel [(const_int 4) (const_int 5)
 342                      (const_int 6) (const_int 7)
 343                      (const_int 0) (const_int 1)
 344                      (const_int 2) (const_int 3)])))]
 345   "
 346 {
 347   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 348                                        : operands[0];
 349 }
 350   "
 351   [(set_attr "type" "vecload")
 352    (set_attr "length" "8")])
 353
 354 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 355   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 356         (match_operand:V16QI 1 "memory_operand" "Z"))]
 357   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 358   "#"
 359   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 360   [(set (match_dup 2)
 361         (vec_select:V16QI
 362           (match_dup 1)
 363           (parallel [(const_int 8) (const_int 9)
 364                      (const_int 10) (const_int 11)
 365                      (const_int 12) (const_int 13)
 366                      (const_int 14) (const_int 15)
 367                      (const_int 0) (const_int 1)
 368                      (const_int 2) (const_int 3)
 369                      (const_int 4) (const_int 5)
 370                      (const_int 6) (const_int 7)])))
 371    (set (match_dup 0)
 372         (vec_select:V16QI
 373           (match_dup 2)
 374           (parallel [(const_int 8) (const_int 9)
 375                      (const_int 10) (const_int 11)
 376                      (const_int 12) (const_int 13)
 377                      (const_int 14) (const_int 15)
 378                      (const_int 0) (const_int 1)
 379                      (const_int 2) (const_int 3)
 380                      (const_int 4) (const_int 5)
 381                      (const_int 6) (const_int 7)])))]
 382   "
 383 {
 384   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 385                                        : operands[0];
 386 }
 387   "
 388   [(set_attr "type" "vecload")
 389    (set_attr "length" "8")])
 390
 391 (define_insn "*vsx_le_perm_store_<mode>"
 392   [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
 393         (match_operand:VSX_LE 1 "vsx_register_operand" "+<VSa>"))]
 394   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 395   "#"
 396   [(set_attr "type" "vecstore")
 397    (set_attr "length" "12")])
 398
 399 (define_split
 400   [(set (match_operand:VSX_LE 0 "memory_operand" "")
 401         (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
 402   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
 403   [(set (match_dup 2)
 404         (vec_select:<MODE>
 405           (match_dup 1)
 406           (parallel [(const_int 1) (const_int 0)])))
 407    (set (match_dup 0)
 408         (vec_select:<MODE>
 409           (match_dup 2)
 410           (parallel [(const_int 1) (const_int 0)])))]
 411 {
 412   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 413                                        : operands[1];
 414 })
 415
 416 ;; The post-reload split requires that we re-permute the source
 417 ;; register in case it is still live.
 418 (define_split
 419   [(set (match_operand:VSX_LE 0 "memory_operand" "")
 420         (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
 421   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
 422   [(set (match_dup 1)
 423         (vec_select:<MODE>
 424           (match_dup 1)
 425           (parallel [(const_int 1) (const_int 0)])))
 426    (set (match_dup 0)
 427         (vec_select:<MODE>
 428           (match_dup 1)
 429           (parallel [(const_int 1) (const_int 0)])))
 430    (set (match_dup 1)
 431         (vec_select:<MODE>
 432           (match_dup 1)
 433           (parallel [(const_int 1) (const_int 0)])))]
 434   "")
 435
 436 (define_insn "*vsx_le_perm_store_<mode>"
 437   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
 438         (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
 439   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 440   "#"
 441   [(set_attr "type" "vecstore")
 442    (set_attr "length" "12")])
 443
 444 (define_split
 445   [(set (match_operand:VSX_W 0 "memory_operand" "")
 446         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 447   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
 448   [(set (match_dup 2)
 449         (vec_select:<MODE>
 450           (match_dup 1)
 451           (parallel [(const_int 2) (const_int 3)
 452                      (const_int 0) (const_int 1)])))
 453    (set (match_dup 0)
 454         (vec_select:<MODE>
 455           (match_dup 2)
 456           (parallel [(const_int 2) (const_int 3)
 457                      (const_int 0) (const_int 1)])))]
 458 {
 459   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 460                                        : operands[1];
 461 })
 462
 463 ;; The post-reload split requires that we re-permute the source
 464 ;; register in case it is still live.
 465 (define_split
 466   [(set (match_operand:VSX_W 0 "memory_operand" "")
 467         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 468   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
 469   [(set (match_dup 1)
 470         (vec_select:<MODE>
 471           (match_dup 1)
 472           (parallel [(const_int 2) (const_int 3)
 473                      (const_int 0) (const_int 1)])))
 474    (set (match_dup 0)
 475         (vec_select:<MODE>
 476           (match_dup 1)
 477           (parallel [(const_int 2) (const_int 3)
 478                      (const_int 0) (const_int 1)])))
 479    (set (match_dup 1)
 480         (vec_select:<MODE>
 481           (match_dup 1)
 482           (parallel [(const_int 2) (const_int 3)
 483                      (const_int 0) (const_int 1)])))]
 484   "")
 485
 486 (define_insn "*vsx_le_perm_store_v8hi"
 487   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
 488         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 489   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 490   "#"
 491   [(set_attr "type" "vecstore")
 492    (set_attr "length" "12")])
 493
 494 (define_split
 495   [(set (match_operand:V8HI 0 "memory_operand" "")
 496         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 497   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
 498   [(set (match_dup 2)
 499         (vec_select:V8HI
 500           (match_dup 1)
 501           (parallel [(const_int 4) (const_int 5)
 502                      (const_int 6) (const_int 7)
 503                      (const_int 0) (const_int 1)
 504                      (const_int 2) (const_int 3)])))
 505    (set (match_dup 0)
 506         (vec_select:V8HI
 507           (match_dup 2)
 508           (parallel [(const_int 4) (const_int 5)
 509                      (const_int 6) (const_int 7)
 510                      (const_int 0) (const_int 1)
 511                      (const_int 2) (const_int 3)])))]
 512 {
 513   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 514                                        : operands[1];
 515 })
 516
 517 ;; The post-reload split requires that we re-permute the source
 518 ;; register in case it is still live.
 519 (define_split
 520   [(set (match_operand:V8HI 0 "memory_operand" "")
 521         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 522   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
 523   [(set (match_dup 1)
 524         (vec_select:V8HI
 525           (match_dup 1)
 526           (parallel [(const_int 4) (const_int 5)
 527                      (const_int 6) (const_int 7)
 528                      (const_int 0) (const_int 1)
 529                      (const_int 2) (const_int 3)])))
 530    (set (match_dup 0)
 531         (vec_select:V8HI
 532           (match_dup 1)
 533           (parallel [(const_int 4) (const_int 5)
 534                      (const_int 6) (const_int 7)
 535                      (const_int 0) (const_int 1)
 536                      (const_int 2) (const_int 3)])))
 537    (set (match_dup 1)
 538         (vec_select:V8HI
 539           (match_dup 1)
 540           (parallel [(const_int 4) (const_int 5)
 541                      (const_int 6) (const_int 7)
 542                      (const_int 0) (const_int 1)
 543                      (const_int 2) (const_int 3)])))]
 544   "")
 545
 546 (define_insn "*vsx_le_perm_store_v16qi"
 547   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
 548         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 549   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 550   "#"
 551   [(set_attr "type" "vecstore")
 552    (set_attr "length" "12")])
 553
 554 (define_split
 555   [(set (match_operand:V16QI 0 "memory_operand" "")
 556         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 557   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
 558   [(set (match_dup 2)
 559         (vec_select:V16QI
 560           (match_dup 1)
 561           (parallel [(const_int 8) (const_int 9)
 562                      (const_int 10) (const_int 11)
 563                      (const_int 12) (const_int 13)
 564                      (const_int 14) (const_int 15)
 565                      (const_int 0) (const_int 1)
 566                      (const_int 2) (const_int 3)
 567                      (const_int 4) (const_int 5)
 568                      (const_int 6) (const_int 7)])))
 569    (set (match_dup 0)
 570         (vec_select:V16QI
 571           (match_dup 2)
 572           (parallel [(const_int 8) (const_int 9)
 573                      (const_int 10) (const_int 11)
 574                      (const_int 12) (const_int 13)
 575                      (const_int 14) (const_int 15)
 576                      (const_int 0) (const_int 1)
 577                      (const_int 2) (const_int 3)
 578                      (const_int 4) (const_int 5)
 579                      (const_int 6) (const_int 7)])))]
 580 {
 581   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 582                                        : operands[1];
 583 })
 584
 585 ;; The post-reload split requires that we re-permute the source
 586 ;; register in case it is still live.
 587 (define_split
 588   [(set (match_operand:V16QI 0 "memory_operand" "")
 589         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 590   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
 591   [(set (match_dup 1)
 592         (vec_select:V16QI
 593           (match_dup 1)
 594           (parallel [(const_int 8) (const_int 9)
 595                      (const_int 10) (const_int 11)
 596                      (const_int 12) (const_int 13)
 597                      (const_int 14) (const_int 15)
 598                      (const_int 0) (const_int 1)
 599                      (const_int 2) (const_int 3)
 600                      (const_int 4) (const_int 5)
 601                      (const_int 6) (const_int 7)])))
 602    (set (match_dup 0)
 603         (vec_select:V16QI
 604           (match_dup 1)
 605           (parallel [(const_int 8) (const_int 9)
 606                      (const_int 10) (const_int 11)
 607                      (const_int 12) (const_int 13)
 608                      (const_int 14) (const_int 15)
 609                      (const_int 0) (const_int 1)
 610                      (const_int 2) (const_int 3)
 611                      (const_int 4) (const_int 5)
 612                      (const_int 6) (const_int 7)])))
 613    (set (match_dup 1)
 614         (vec_select:V16QI
 615           (match_dup 1)
 616           (parallel [(const_int 8) (const_int 9)
 617                      (const_int 10) (const_int 11)
 618                      (const_int 12) (const_int 13)
 619                      (const_int 14) (const_int 15)
 620                      (const_int 0) (const_int 1)
 621                      (const_int 2) (const_int 3)
 622                      (const_int 4) (const_int 5)
 623                      (const_int 6) (const_int 7)])))]
 624   "")
 625
 626
 627 (define_insn "*vsx_mov<mode>"
 628   [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?<VSa>,?<VSa>,wQ,?&r,??Y,??r,??r,<VSr>,?<VSa>,*r,v,wZ, v")
 629         (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,<VSa>,Z,<VSa>,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
 630   "VECTOR_MEM_VSX_P (<MODE>mode)
 631    && (register_operand (operands[0], <MODE>mode)
 632        || register_operand (operands[1], <MODE>mode))"
 633 {
 634   return rs6000_output_move_128bit (operands);
 635 }
 636   [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
 637    (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
 638
 639 ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
 640 ;; use of TImode is for unions.  However for plain data movement, slightly
 641 ;; favor the vector loads
 642 (define_insn "*vsx_movti_64bit"
 643   [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
 644         (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
 645   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
 646    && (register_operand (operands[0], TImode)
 647        || register_operand (operands[1], TImode))"
 648 {
 649   return rs6000_output_move_128bit (operands);
 650 }
 651   [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
 652    (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
 653
 654 (define_insn "*vsx_movti_32bit"
 655   [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
 656         (match_operand:TI 1 "input_operand"        "wa, Z,wa, O,W,wZ, v,r,r,    Q,    Y,    r,n"))]
 657   "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
 658    && (register_operand (operands[0], TImode)
 659        || register_operand (operands[1], TImode))"
 660 {
 661   switch (which_alternative)
 662     {
 663     case 0:
 664       return "stxvd2x %x1,%y0";
 665
 666     case 1:
 667       return "lxvd2x %x0,%y1";
 668
 669     case 2:
 670       return "xxlor %x0,%x1,%x1";
 671
 672     case 3:
 673       return "xxlxor %x0,%x0,%x0";
 674
 675     case 4:
 676       return output_vec_const_move (operands);
 677
 678     case 5:
 679       return "stvx %1,%y0";
 680
 681     case 6:
 682       return "lvx %0,%y1";
 683
 684     case 7:
 685       if (TARGET_STRING)
 686         return \"stswi %1,%P0,16\";
 687
 688     case 8:
 689       return \"#\";
 690
 691     case 9:
 692       /* If the address is not used in the output, we can use lsi.  Otherwise,
 693          fall through to generating four loads.  */
 694       if (TARGET_STRING
 695           && ! reg_overlap_mentioned_p (operands[0], operands[1]))
 696         return \"lswi %0,%P1,16\";
 697       /* ... fall through ...  */
 698
 699     case 10:
 700     case 11:
 701     case 12:
 702       return \"#\";
 703     default:
 704       gcc_unreachable ();
 705     }
 706 }
 707   [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,store,load,load, *, *")
 708    (set_attr "update" "     *,      *,        *,       *,         *,       *,      *,  yes,  yes, yes, yes, *, *")
 709    (set_attr "length" "     4,      4,        4,       4,         8,       4,      4,   16,   16,  16,  16,16,16")
 710    (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
 711                                           (const_string "always")
 712                                           (const_string "conditional")))])
 713
 714 ;; Explicit  load/store expanders for the builtin functions
 715 (define_expand "vsx_load_<mode>"
 716   [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
 717         (match_operand:VSX_M 1 "memory_operand" ""))]
 718   "VECTOR_MEM_VSX_P (<MODE>mode)"
 719   "")
 720
 721 (define_expand "vsx_store_<mode>"
 722   [(set (match_operand:VSX_M 0 "memory_operand" "")
 723         (match_operand:VSX_M 1 "vsx_register_operand" ""))]
 724   "VECTOR_MEM_VSX_P (<MODE>mode)"
 725   "")
 726
 727 \f
 728 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
 729 ;; instructions are now combined with the insn for the traditional floating
 730 ;; point unit.
 731 (define_insn "*vsx_add<mode>3"
 732   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 733         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
 734                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
 735   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 736   "xvadd<VSs> %x0,%x1,%x2"
 737   [(set_attr "type" "<VStype_simple>")
 738    (set_attr "fp_type" "<VSfptype_simple>")])
 739
 740 (define_insn "*vsx_sub<mode>3"
 741   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 742         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
 743                      (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
 744   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 745   "xvsub<VSs> %x0,%x1,%x2"
 746   [(set_attr "type" "<VStype_simple>")
 747    (set_attr "fp_type" "<VSfptype_simple>")])
 748
 749 (define_insn "*vsx_mul<mode>3"
 750   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 751         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
 752                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
 753   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 754   "xvmul<VSs> %x0,%x1,%x2"
 755   [(set_attr "type" "<VStype_simple>")
 756    (set_attr "fp_type" "<VSfptype_mul>")])
 757
 758 ; Emulate vector with scalar for vec_mul in V2DImode
 759 (define_insn_and_split "vsx_mul_v2di"
 760   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
 761         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
 762                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
 763                      UNSPEC_VSX_MULSD))]
 764   "VECTOR_MEM_VSX_P (V2DImode)"
 765   "#"
 766   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
 767   [(const_int 0)]
 768   "
 769 {
 770   rtx op0 = operands[0];
 771   rtx op1 = operands[1];
 772   rtx op2 = operands[2];
 773   rtx op3 = gen_reg_rtx (DImode);
 774   rtx op4 = gen_reg_rtx (DImode);
 775   rtx op5 = gen_reg_rtx (DImode);
 776   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
 777   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
 778   emit_insn (gen_muldi3 (op5, op3, op4));
 779   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
 780   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
 781   emit_insn (gen_muldi3 (op3, op3, op4));
 782   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
 783 }"
 784   [(set_attr "type" "mul")])
 785
 786 (define_insn "*vsx_div<mode>3"
 787   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 788         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
 789                    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
 790   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 791   "xvdiv<VSs> %x0,%x1,%x2"
 792   [(set_attr "type" "<VStype_div>")
 793    (set_attr "fp_type" "<VSfptype_div>")])
 794
 795 ; Emulate vector with scalar for vec_div in V2DImode
 796 (define_insn_and_split "vsx_div_v2di"
 797   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
 798         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
 799                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
 800                      UNSPEC_VSX_DIVSD))]
 801   "VECTOR_MEM_VSX_P (V2DImode)"
 802   "#"
 803   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
 804   [(const_int 0)]
 805   "
 806 {
 807   rtx op0 = operands[0];
 808   rtx op1 = operands[1];
 809   rtx op2 = operands[2];
 810   rtx op3 = gen_reg_rtx (DImode);
 811   rtx op4 = gen_reg_rtx (DImode);
 812   rtx op5 = gen_reg_rtx (DImode);
 813   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
 814   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
 815   emit_insn (gen_divdi3 (op5, op3, op4));
 816   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
 817   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
 818   emit_insn (gen_divdi3 (op3, op3, op4));
 819   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
 820 }"
 821   [(set_attr "type" "div")])
 822
 823 (define_insn_and_split "vsx_udiv_v2di"
 824   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
 825         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
 826                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
 827                      UNSPEC_VSX_DIVUD))]
 828   "VECTOR_MEM_VSX_P (V2DImode)"
 829   "#"
 830   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
 831   [(const_int 0)]
 832   "
 833 {
 834   rtx op0 = operands[0];
 835   rtx op1 = operands[1];
 836   rtx op2 = operands[2];
 837   rtx op3 = gen_reg_rtx (DImode);
 838   rtx op4 = gen_reg_rtx (DImode);
 839   rtx op5 = gen_reg_rtx (DImode);
 840   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
 841   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
 842   emit_insn (gen_udivdi3 (op5, op3, op4));
 843   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
 844   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
 845   emit_insn (gen_udivdi3 (op3, op3, op4));
 846   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
 847 }"
 848   [(set_attr "type" "div")])
 849
 850 ;; *tdiv* instruction returning the FG flag
 851 (define_expand "vsx_tdiv<mode>3_fg"
 852   [(set (match_dup 3)
 853         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
 854                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
 855                      UNSPEC_VSX_TDIV))
 856    (set (match_operand:SI 0 "gpc_reg_operand" "")
 857         (gt:SI (match_dup 3)
 858                (const_int 0)))]
 859   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 860 {
 861   operands[3] = gen_reg_rtx (CCFPmode);
 862 })
 863
 864 ;; *tdiv* instruction returning the FE flag
 865 (define_expand "vsx_tdiv<mode>3_fe"
 866   [(set (match_dup 3)
 867         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
 868                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
 869                      UNSPEC_VSX_TDIV))
 870    (set (match_operand:SI 0 "gpc_reg_operand" "")
 871         (eq:SI (match_dup 3)
 872                (const_int 0)))]
 873   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 874 {
 875   operands[3] = gen_reg_rtx (CCFPmode);
 876 })
 877
 878 (define_insn "*vsx_tdiv<mode>3_internal"
 879   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
 880         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
 881                       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
 882                    UNSPEC_VSX_TDIV))]
 883   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 884   "x<VSv>tdiv<VSs> %0,%x1,%x2"
 885   [(set_attr "type" "<VStype_simple>")
 886    (set_attr "fp_type" "<VSfptype_simple>")])
 887
 888 (define_insn "vsx_fre<mode>2"
 889   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 890         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
 891                       UNSPEC_FRES))]
 892   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 893   "xvre<VSs> %x0,%x1"
 894   [(set_attr "type" "<VStype_simple>")
 895    (set_attr "fp_type" "<VSfptype_simple>")])
 896
 897 (define_insn "*vsx_neg<mode>2"
 898   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 899         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
 900   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 901   "xvneg<VSs> %x0,%x1"
 902   [(set_attr "type" "<VStype_simple>")
 903    (set_attr "fp_type" "<VSfptype_simple>")])
 904
 905 (define_insn "*vsx_abs<mode>2"
 906   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 907         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
 908   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 909   "xvabs<VSs> %x0,%x1"
 910   [(set_attr "type" "<VStype_simple>")
 911    (set_attr "fp_type" "<VSfptype_simple>")])
 912
 913 (define_insn "vsx_nabs<mode>2"
 914   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 915         (neg:VSX_F
 916          (abs:VSX_F
 917           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
 918   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 919   "xvnabs<VSs> %x0,%x1"
 920   [(set_attr "type" "<VStype_simple>")
 921    (set_attr "fp_type" "<VSfptype_simple>")])
 922
 923 (define_insn "vsx_smax<mode>3"
 924   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 925         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
 926                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
 927   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 928   "xvmax<VSs> %x0,%x1,%x2"
 929   [(set_attr "type" "<VStype_simple>")
 930    (set_attr "fp_type" "<VSfptype_simple>")])
 931
 932 (define_insn "*vsx_smin<mode>3"
 933   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 934         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
 935                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
 936   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 937   "xvmin<VSs> %x0,%x1,%x2"
 938   [(set_attr "type" "<VStype_simple>")
 939    (set_attr "fp_type" "<VSfptype_simple>")])
 940
 941 (define_insn "*vsx_sqrt<mode>2"
 942   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 943         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
 944   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 945   "xvsqrt<VSs> %x0,%x1"
 946   [(set_attr "type" "<VStype_sqrt>")
 947    (set_attr "fp_type" "<VSfptype_sqrt>")])
 948
 949 (define_insn "*vsx_rsqrte<mode>2"
 950   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 951         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
 952                       UNSPEC_RSQRT))]
 953   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 954   "xvrsqrte<VSs> %x0,%x1"
 955   [(set_attr "type" "<VStype_simple>")
 956    (set_attr "fp_type" "<VSfptype_simple>")])
 957
 958 ;; *tsqrt* returning the fg flag
 959 (define_expand "vsx_tsqrt<mode>2_fg"
 960   [(set (match_dup 3)
 961         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
 962                      UNSPEC_VSX_TSQRT))
 963    (set (match_operand:SI 0 "gpc_reg_operand" "")
 964         (gt:SI (match_dup 3)
 965                (const_int 0)))]
 966   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 967 {
 968   operands[3] = gen_reg_rtx (CCFPmode);
 969 })
 970
 971 ;; *tsqrt* returning the fe flag
 972 (define_expand "vsx_tsqrt<mode>2_fe"
 973   [(set (match_dup 3)
 974         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
 975                      UNSPEC_VSX_TSQRT))
 976    (set (match_operand:SI 0 "gpc_reg_operand" "")
 977         (eq:SI (match_dup 3)
 978                (const_int 0)))]
 979   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 980 {
 981   operands[3] = gen_reg_rtx (CCFPmode);
 982 })
 983
 984 (define_insn "*vsx_tsqrt<mode>2_internal"
 985   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
 986         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
 987                      UNSPEC_VSX_TSQRT))]
 988   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 989   "x<VSv>tsqrt<VSs> %0,%x1"
 990   [(set_attr "type" "<VStype_simple>")
 991    (set_attr "fp_type" "<VSfptype_simple>")])
 992
 993 ;; Fused vector multiply/add instructions. Support the classical Altivec
 994 ;; versions of fma, which allows the target to be a separate register from the
 995 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
 996 ;; multiply.
 997
 998 (define_insn "*vsx_fmav4sf4"
 999   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1000         (fma:V4SF
1001           (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1002           (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1003           (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1004   "VECTOR_UNIT_VSX_P (V4SFmode)"
1005   "@
1006    xvmaddasp %x0,%x1,%x2
1007    xvmaddmsp %x0,%x1,%x3
1008    xvmaddasp %x0,%x1,%x2
1009    xvmaddmsp %x0,%x1,%x3
1010    vmaddfp %0,%1,%2,%3"
1011   [(set_attr "type" "vecfloat")])
1012
1013 (define_insn "*vsx_fmav2df4"
1014   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1015         (fma:V2DF
1016           (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1017           (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1018           (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1019   "VECTOR_UNIT_VSX_P (V2DFmode)"
1020   "@
1021    xvmaddadp %x0,%x1,%x2
1022    xvmaddmdp %x0,%x1,%x3
1023    xvmaddadp %x0,%x1,%x2
1024    xvmaddmdp %x0,%x1,%x3"
1025   [(set_attr "type" "vecdouble")])
1026
1027 (define_insn "*vsx_fms<mode>4"
1028   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1029         (fma:VSX_F
1030           (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1031           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1032           (neg:VSX_F
1033             (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1034   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1035   "@
1036    xvmsuba<VSs> %x0,%x1,%x2
1037    xvmsubm<VSs> %x0,%x1,%x3
1038    xvmsuba<VSs> %x0,%x1,%x2
1039    xvmsubm<VSs> %x0,%x1,%x3"
1040   [(set_attr "type" "<VStype_mul>")])
1041
1042 (define_insn "*vsx_nfma<mode>4"
1043   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1044         (neg:VSX_F
1045          (fma:VSX_F
1046           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1047           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1048           (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1049   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1050   "@
1051    xvnmadda<VSs> %x0,%x1,%x2
1052    xvnmaddm<VSs> %x0,%x1,%x3
1053    xvnmadda<VSs> %x0,%x1,%x2
1054    xvnmaddm<VSs> %x0,%x1,%x3"
1055   [(set_attr "type" "<VStype_mul>")
1056    (set_attr "fp_type" "<VSfptype_mul>")])
1057
1058 (define_insn "*vsx_nfmsv4sf4"
1059   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1060         (neg:V4SF
1061          (fma:V4SF
1062            (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1063            (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1064            (neg:V4SF
1065              (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1066   "VECTOR_UNIT_VSX_P (V4SFmode)"
1067   "@
1068    xvnmsubasp %x0,%x1,%x2
1069    xvnmsubmsp %x0,%x1,%x3
1070    xvnmsubasp %x0,%x1,%x2
1071    xvnmsubmsp %x0,%x1,%x3
1072    vnmsubfp %0,%1,%2,%3"
1073   [(set_attr "type" "vecfloat")])
1074
1075 (define_insn "*vsx_nfmsv2df4"
1076   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1077         (neg:V2DF
1078          (fma:V2DF
1079            (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1080            (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1081            (neg:V2DF
1082              (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1083   "VECTOR_UNIT_VSX_P (V2DFmode)"
1084   "@
1085    xvnmsubadp %x0,%x1,%x2
1086    xvnmsubmdp %x0,%x1,%x3
1087    xvnmsubadp %x0,%x1,%x2
1088    xvnmsubmdp %x0,%x1,%x3"
1089   [(set_attr "type" "vecdouble")])
1090
1091 ;; Vector conditional expressions (no scalar version for these instructions)
1092 (define_insn "vsx_eq<mode>"
1093   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1094         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1095                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1096   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1097   "xvcmpeq<VSs> %x0,%x1,%x2"
1098   [(set_attr "type" "<VStype_simple>")
1099    (set_attr "fp_type" "<VSfptype_simple>")])
1100
1101 (define_insn "vsx_gt<mode>"
1102   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1103         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1104                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1105   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1106   "xvcmpgt<VSs> %x0,%x1,%x2"
1107   [(set_attr "type" "<VStype_simple>")
1108    (set_attr "fp_type" "<VSfptype_simple>")])
1109
1110 (define_insn "*vsx_ge<mode>"
1111   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1112         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1113                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1114   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1115   "xvcmpge<VSs> %x0,%x1,%x2"
1116   [(set_attr "type" "<VStype_simple>")
1117    (set_attr "fp_type" "<VSfptype_simple>")])
1118
1119 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1120 ;; indicate a combined status
1121 (define_insn "*vsx_eq_<mode>_p"
1122   [(set (reg:CC 74)
1123         (unspec:CC
1124          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1125                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1126          UNSPEC_PREDICATE))
1127    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1128         (eq:VSX_F (match_dup 1)
1129                   (match_dup 2)))]
1130   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1131   "xvcmpeq<VSs>. %x0,%x1,%x2"
1132   [(set_attr "type" "<VStype_simple>")])
1133
1134 (define_insn "*vsx_gt_<mode>_p"
1135   [(set (reg:CC 74)
1136         (unspec:CC
1137          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1138                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1139          UNSPEC_PREDICATE))
1140    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1141         (gt:VSX_F (match_dup 1)
1142                   (match_dup 2)))]
1143   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1144   "xvcmpgt<VSs>. %x0,%x1,%x2"
1145   [(set_attr "type" "<VStype_simple>")])
1146
1147 (define_insn "*vsx_ge_<mode>_p"
1148   [(set (reg:CC 74)
1149         (unspec:CC
1150          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1151                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1152          UNSPEC_PREDICATE))
1153    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1154         (ge:VSX_F (match_dup 1)
1155                   (match_dup 2)))]
1156   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1157   "xvcmpge<VSs>. %x0,%x1,%x2"
1158   [(set_attr "type" "<VStype_simple>")])
1159
1160 ;; Vector select
1161 (define_insn "*vsx_xxsel<mode>"
1162   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1163         (if_then_else:VSX_L
1164          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1165                 (match_operand:VSX_L 4 "zero_constant" ""))
1166          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1167          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1168   "VECTOR_MEM_VSX_P (<MODE>mode)"
1169   "xxsel %x0,%x3,%x2,%x1"
1170   [(set_attr "type" "vecperm")])
1171
1172 (define_insn "*vsx_xxsel<mode>_uns"
1173   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1174         (if_then_else:VSX_L
1175          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1176                    (match_operand:VSX_L 4 "zero_constant" ""))
1177          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1178          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1179   "VECTOR_MEM_VSX_P (<MODE>mode)"
1180   "xxsel %x0,%x3,%x2,%x1"
1181   [(set_attr "type" "vecperm")])
1182
1183 ;; Copy sign
1184 (define_insn "vsx_copysign<mode>3"
1185   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1186         (unspec:VSX_F
1187          [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1188           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1189          UNSPEC_COPYSIGN))]
1190   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1191   "xvcpsgn<VSs> %x0,%x2,%x1"
1192   [(set_attr "type" "<VStype_simple>")
1193    (set_attr "fp_type" "<VSfptype_simple>")])
1194
1195 ;; For the conversions, limit the register class for the integer value to be
1196 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1197 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1198 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1199 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md.
1200 (define_insn "vsx_float<VSi><mode>2"
1201   [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1202         (float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1203   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1204   "x<VSv>cvsx<VSc><VSs> %x0,%x1"
1205   [(set_attr "type" "<VStype_simple>")
1206    (set_attr "fp_type" "<VSfptype_simple>")])
1207
1208 (define_insn "vsx_floatuns<VSi><mode>2"
1209   [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1210         (unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1211   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1212   "x<VSv>cvux<VSc><VSs> %x0,%x1"
1213   [(set_attr "type" "<VStype_simple>")
1214    (set_attr "fp_type" "<VSfptype_simple>")])
1215
1216 (define_insn "vsx_fix_trunc<mode><VSi>2"
1217   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1218         (fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1219   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1220   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1221   [(set_attr "type" "<VStype_simple>")
1222    (set_attr "fp_type" "<VSfptype_simple>")])
1223
1224 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1225   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1226         (unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1227   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1228   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1229   [(set_attr "type" "<VStype_simple>")
1230    (set_attr "fp_type" "<VSfptype_simple>")])
1231
1232 ;; Math rounding functions
1233 (define_insn "vsx_x<VSv>r<VSs>i"
1234   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1235         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1236                       UNSPEC_VSX_ROUND_I))]
1237   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1238   "x<VSv>r<VSs>i %x0,%x1"
1239   [(set_attr "type" "<VStype_simple>")
1240    (set_attr "fp_type" "<VSfptype_simple>")])
1241
1242 (define_insn "vsx_x<VSv>r<VSs>ic"
1243   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1244         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1245                       UNSPEC_VSX_ROUND_IC))]
1246   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1247   "x<VSv>r<VSs>ic %x0,%x1"
1248   [(set_attr "type" "<VStype_simple>")
1249    (set_attr "fp_type" "<VSfptype_simple>")])
1250
1251 (define_insn "vsx_btrunc<mode>2"
1252   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1253         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1254   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1255   "xvr<VSs>iz %x0,%x1"
1256   [(set_attr "type" "<VStype_simple>")
1257    (set_attr "fp_type" "<VSfptype_simple>")])
1258
1259 (define_insn "*vsx_b2trunc<mode>2"
1260   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1261         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1262                       UNSPEC_FRIZ))]
1263   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1264   "x<VSv>r<VSs>iz %x0,%x1"
1265   [(set_attr "type" "<VStype_simple>")
1266    (set_attr "fp_type" "<VSfptype_simple>")])
1267
1268 (define_insn "vsx_floor<mode>2"
1269   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1270         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1271                       UNSPEC_FRIM))]
1272   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1273   "xvr<VSs>im %x0,%x1"
1274   [(set_attr "type" "<VStype_simple>")
1275    (set_attr "fp_type" "<VSfptype_simple>")])
1276
1277 (define_insn "vsx_ceil<mode>2"
1278   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1279         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1280                       UNSPEC_FRIP))]
1281   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1282   "xvr<VSs>ip %x0,%x1"
1283   [(set_attr "type" "<VStype_simple>")
1284    (set_attr "fp_type" "<VSfptype_simple>")])
1285
1286 \f
1287 ;; VSX convert to/from double vector
1288
1289 ;; Convert between single and double precision
1290 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1291 ;; scalar single precision instructions internally use the double format.
1292 ;; Prefer the altivec registers, since we likely will need to do a vperm
1293 (define_insn "vsx_<VS_spdp_insn>"
1294   [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1295         (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1296                               UNSPEC_VSX_CVSPDP))]
1297   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1298   "<VS_spdp_insn> %x0,%x1"
1299   [(set_attr "type" "<VS_spdp_type>")])
1300
1301 ;; xscvspdp, represent the scalar SF type as V4SF
1302 (define_insn "vsx_xscvspdp"
1303   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1304         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1305                    UNSPEC_VSX_CVSPDP))]
1306   "VECTOR_UNIT_VSX_P (V4SFmode)"
1307   "xscvspdp %x0,%x1"
1308   [(set_attr "type" "fp")])
1309
1310 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1311 ;; format of scalars is actually DF.
1312 (define_insn "vsx_xscvdpsp_scalar"
1313   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1314         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1315                      UNSPEC_VSX_CVSPDP))]
1316   "VECTOR_UNIT_VSX_P (V4SFmode)"
1317   "xscvdpsp %x0,%x1"
1318   [(set_attr "type" "fp")])
1319
1320 ;; Same as vsx_xscvspdp, but use SF as the type
1321 (define_insn "vsx_xscvspdp_scalar2"
1322   [(set (match_operand:SF 0 "vsx_register_operand" "=f")
1323         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1324                    UNSPEC_VSX_CVSPDP))]
1325   "VECTOR_UNIT_VSX_P (V4SFmode)"
1326   "xscvspdp %x0,%x1"
1327   [(set_attr "type" "fp")])
1328
1329 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1330 (define_insn "vsx_xscvdpspn"
1331   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww,?ww")
1332         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1333                      UNSPEC_VSX_CVDPSPN))]
1334   "TARGET_XSCVDPSPN"
1335   "xscvdpspn %x0,%x1"
1336   [(set_attr "type" "fp")])
1337
1338 (define_insn "vsx_xscvspdpn"
1339   [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?ws")
1340         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
1341                    UNSPEC_VSX_CVSPDPN))]
1342   "TARGET_XSCVSPDPN"
1343   "xscvspdpn %x0,%x1"
1344   [(set_attr "type" "fp")])
1345
1346 (define_insn "vsx_xscvdpspn_scalar"
1347   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa")
1348         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww,ww")]
1349                      UNSPEC_VSX_CVDPSPN))]
1350   "TARGET_XSCVDPSPN"
1351   "xscvdpspn %x0,%x1"
1352   [(set_attr "type" "fp")])
1353
1354 ;; Used by direct move to move a SFmode value from GPR to VSX register
1355 (define_insn "vsx_xscvspdpn_directmove"
1356   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1357         (unspec:SF [(match_operand:DI 1 "vsx_register_operand" "wa")]
1358                    UNSPEC_VSX_CVSPDPN))]
1359   "TARGET_XSCVSPDPN"
1360   "xscvspdpn %x0,%x1"
1361   [(set_attr "type" "fp")])
1362
1363 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1364
1365 (define_expand "vsx_xvcvsxddp_scale"
1366   [(match_operand:V2DF 0 "vsx_register_operand" "")
1367    (match_operand:V2DI 1 "vsx_register_operand" "")
1368    (match_operand:QI 2 "immediate_operand" "")]
1369   "VECTOR_UNIT_VSX_P (V2DFmode)"
1370 {
1371   rtx op0 = operands[0];
1372   rtx op1 = operands[1];
1373   int scale = INTVAL(operands[2]);
1374   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1375   if (scale != 0)
1376     rs6000_scale_v2df (op0, op0, -scale);
1377   DONE;
1378 })
1379
1380 (define_insn "vsx_xvcvsxddp"
1381   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1382         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1383                      UNSPEC_VSX_XVCVSXDDP))]
1384   "VECTOR_UNIT_VSX_P (V2DFmode)"
1385   "xvcvsxddp %x0,%x1"
1386   [(set_attr "type" "vecdouble")])
1387
1388 (define_expand "vsx_xvcvuxddp_scale"
1389   [(match_operand:V2DF 0 "vsx_register_operand" "")
1390    (match_operand:V2DI 1 "vsx_register_operand" "")
1391    (match_operand:QI 2 "immediate_operand" "")]
1392   "VECTOR_UNIT_VSX_P (V2DFmode)"
1393 {
1394   rtx op0 = operands[0];
1395   rtx op1 = operands[1];
1396   int scale = INTVAL(operands[2]);
1397   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1398   if (scale != 0)
1399     rs6000_scale_v2df (op0, op0, -scale);
1400   DONE;
1401 })
1402
1403 (define_insn "vsx_xvcvuxddp"
1404   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1405         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1406                      UNSPEC_VSX_XVCVUXDDP))]
1407   "VECTOR_UNIT_VSX_P (V2DFmode)"
1408   "xvcvuxddp %x0,%x1"
1409   [(set_attr "type" "vecdouble")])
1410
1411 (define_expand "vsx_xvcvdpsxds_scale"
1412   [(match_operand:V2DI 0 "vsx_register_operand" "")
1413    (match_operand:V2DF 1 "vsx_register_operand" "")
1414    (match_operand:QI 2 "immediate_operand" "")]
1415   "VECTOR_UNIT_VSX_P (V2DFmode)"
1416 {
1417   rtx op0 = operands[0];
1418   rtx op1 = operands[1];
1419   rtx tmp = gen_reg_rtx (V2DFmode);
1420   int scale = INTVAL(operands[2]);
1421   if (scale != 0)
1422     rs6000_scale_v2df (tmp, op1, scale);
1423   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1424   DONE;
1425 })
1426
1427 (define_insn "vsx_xvcvdpsxds"
1428   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1429         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1430                      UNSPEC_VSX_XVCVDPSXDS))]
1431   "VECTOR_UNIT_VSX_P (V2DFmode)"
1432   "xvcvdpsxds %x0,%x1"
1433   [(set_attr "type" "vecdouble")])
1434
1435 (define_expand "vsx_xvcvdpuxds_scale"
1436   [(match_operand:V2DI 0 "vsx_register_operand" "")
1437    (match_operand:V2DF 1 "vsx_register_operand" "")
1438    (match_operand:QI 2 "immediate_operand" "")]
1439   "VECTOR_UNIT_VSX_P (V2DFmode)"
1440 {
1441   rtx op0 = operands[0];
1442   rtx op1 = operands[1];
1443   rtx tmp = gen_reg_rtx (V2DFmode);
1444   int scale = INTVAL(operands[2]);
1445   if (scale != 0)
1446     rs6000_scale_v2df (tmp, op1, scale);
1447   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1448   DONE;
1449 })
1450
1451 (define_insn "vsx_xvcvdpuxds"
1452   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1453         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1454                      UNSPEC_VSX_XVCVDPUXDS))]
1455   "VECTOR_UNIT_VSX_P (V2DFmode)"
1456   "xvcvdpuxds %x0,%x1"
1457   [(set_attr "type" "vecdouble")])
1458
1459 ;; Convert from 64-bit to 32-bit types
1460 ;; Note, favor the Altivec registers since the usual use of these instructions
1461 ;; is in vector converts and we need to use the Altivec vperm instruction.
1462
1463 (define_insn "vsx_xvcvdpsxws"
1464   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1465         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1466                      UNSPEC_VSX_CVDPSXWS))]
1467   "VECTOR_UNIT_VSX_P (V2DFmode)"
1468   "xvcvdpsxws %x0,%x1"
1469   [(set_attr "type" "vecdouble")])
1470
1471 (define_insn "vsx_xvcvdpuxws"
1472   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1473         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1474                      UNSPEC_VSX_CVDPUXWS))]
1475   "VECTOR_UNIT_VSX_P (V2DFmode)"
1476   "xvcvdpuxws %x0,%x1"
1477   [(set_attr "type" "vecdouble")])
1478
1479 (define_insn "vsx_xvcvsxdsp"
1480   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1481         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1482                      UNSPEC_VSX_CVSXDSP))]
1483   "VECTOR_UNIT_VSX_P (V2DFmode)"
1484   "xvcvsxdsp %x0,%x1"
1485   [(set_attr "type" "vecfloat")])
1486
1487 (define_insn "vsx_xvcvuxdsp"
1488   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1489         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1490                      UNSPEC_VSX_CVUXDSP))]
1491   "VECTOR_UNIT_VSX_P (V2DFmode)"
1492   "xvcvuxwdp %x0,%x1"
1493   [(set_attr "type" "vecdouble")])
1494
1495 ;; Convert from 32-bit to 64-bit types
1496 (define_insn "vsx_xvcvsxwdp"
1497   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1498         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1499                      UNSPEC_VSX_CVSXWDP))]
1500   "VECTOR_UNIT_VSX_P (V2DFmode)"
1501   "xvcvsxwdp %x0,%x1"
1502   [(set_attr "type" "vecdouble")])
1503
1504 (define_insn "vsx_xvcvuxwdp"
1505   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1506         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1507                      UNSPEC_VSX_CVUXWDP))]
1508   "VECTOR_UNIT_VSX_P (V2DFmode)"
1509   "xvcvuxwdp %x0,%x1"
1510   [(set_attr "type" "vecdouble")])
1511
1512 (define_insn "vsx_xvcvspsxds"
1513   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1514         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1515                      UNSPEC_VSX_CVSPSXDS))]
1516   "VECTOR_UNIT_VSX_P (V2DFmode)"
1517   "xvcvspsxds %x0,%x1"
1518   [(set_attr "type" "vecdouble")])
1519
1520 (define_insn "vsx_xvcvspuxds"
1521   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1522         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1523                      UNSPEC_VSX_CVSPUXDS))]
1524   "VECTOR_UNIT_VSX_P (V2DFmode)"
1525   "xvcvspuxds %x0,%x1"
1526   [(set_attr "type" "vecdouble")])
1527
1528 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1529 ;; since the xsrdpiz instruction does not truncate the value if the floating
1530 ;; point value is < LONG_MIN or > LONG_MAX.
1531 (define_insn "*vsx_float_fix_<mode>2"
1532   [(set (match_operand:VSX_DF 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1533         (float:VSX_DF
1534          (fix:<VSI>
1535           (match_operand:VSX_DF 1 "vsx_register_operand" "<VSr>,?<VSa>"))))]
1536   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1537    && VECTOR_UNIT_VSX_P (<MODE>mode) && flag_unsafe_math_optimizations
1538    && !flag_trapping_math && TARGET_FRIZ"
1539   "x<VSv>r<VSs>iz %x0,%x1"
1540   [(set_attr "type" "<VStype_simple>")
1541    (set_attr "fp_type" "<VSfptype_simple>")])
1542
1543 \f
1544 ;; Permute operations
1545
1546 ;; Build a V2DF/V2DI vector from two scalars
1547 (define_insn "vsx_concat_<mode>"
1548   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1549         (vec_concat:VSX_D
1550          (match_operand:<VS_scalar> 1 "vsx_register_operand" "<VS_64reg>,<VSa>")
1551          (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")))]
1552   "VECTOR_MEM_VSX_P (<MODE>mode)"
1553 {
1554   if (BYTES_BIG_ENDIAN)
1555     return "xxpermdi %x0,%x1,%x2,0";
1556   else
1557     return "xxpermdi %x0,%x2,%x1,0";
1558 }
1559   [(set_attr "type" "vecperm")])
1560
1561 ;; Special purpose concat using xxpermdi to glue two single precision values
1562 ;; together, relying on the fact that internally scalar floats are represented
1563 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
1564 (define_insn "vsx_concat_v2sf"
1565   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1566         (unspec:V2DF
1567          [(match_operand:SF 1 "vsx_register_operand" "f,f")
1568           (match_operand:SF 2 "vsx_register_operand" "f,f")]
1569          UNSPEC_VSX_CONCAT))]
1570   "VECTOR_MEM_VSX_P (V2DFmode)"
1571 {
1572   if (BYTES_BIG_ENDIAN)
1573     return "xxpermdi %x0,%x1,%x2,0";
1574   else
1575     return "xxpermdi %x0,%x2,%x1,0";
1576 }
1577   [(set_attr "type" "vecperm")])
1578
1579 ;; xxpermdi for little endian loads and stores.  We need several of
1580 ;; these since the form of the PARALLEL differs by mode.
1581 (define_insn "*vsx_xxpermdi2_le_<mode>"
1582   [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1583         (vec_select:VSX_LE
1584           (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1585           (parallel [(const_int 1) (const_int 0)])))]
1586   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1587   "xxpermdi %x0,%x1,%x1,2"
1588   [(set_attr "type" "vecperm")])
1589
1590 (define_insn "*vsx_xxpermdi4_le_<mode>"
1591   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1592         (vec_select:VSX_W
1593           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1594           (parallel [(const_int 2) (const_int 3)
1595                      (const_int 0) (const_int 1)])))]
1596   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1597   "xxpermdi %x0,%x1,%x1,2"
1598   [(set_attr "type" "vecperm")])
1599
1600 (define_insn "*vsx_xxpermdi8_le_V8HI"
1601   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1602         (vec_select:V8HI
1603           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1604           (parallel [(const_int 4) (const_int 5)
1605                      (const_int 6) (const_int 7)
1606                      (const_int 0) (const_int 1)
1607                      (const_int 2) (const_int 3)])))]
1608   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1609   "xxpermdi %x0,%x1,%x1,2"
1610   [(set_attr "type" "vecperm")])
1611
1612 (define_insn "*vsx_xxpermdi16_le_V16QI"
1613   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1614         (vec_select:V16QI
1615           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1616           (parallel [(const_int 8) (const_int 9)
1617                      (const_int 10) (const_int 11)
1618                      (const_int 12) (const_int 13)
1619                      (const_int 14) (const_int 15)
1620                      (const_int 0) (const_int 1)
1621                      (const_int 2) (const_int 3)
1622                      (const_int 4) (const_int 5)
1623                      (const_int 6) (const_int 7)])))]
1624   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1625   "xxpermdi %x0,%x1,%x1,2"
1626   [(set_attr "type" "vecperm")])
1627
1628 ;; lxvd2x for little endian loads.  We need several of
1629 ;; these since the form of the PARALLEL differs by mode.
1630 (define_insn "*vsx_lxvd2x2_le_<mode>"
1631   [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1632         (vec_select:VSX_LE
1633           (match_operand:VSX_LE 1 "memory_operand" "Z")
1634           (parallel [(const_int 1) (const_int 0)])))]
1635   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1636   "lxvd2x %x0,%y1"
1637   [(set_attr "type" "vecload")])
1638
1639 (define_insn "*vsx_lxvd2x4_le_<mode>"
1640   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1641         (vec_select:VSX_W
1642           (match_operand:VSX_W 1 "memory_operand" "Z")
1643           (parallel [(const_int 2) (const_int 3)
1644                      (const_int 0) (const_int 1)])))]
1645   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1646   "lxvd2x %x0,%y1"
1647   [(set_attr "type" "vecload")])
1648
1649 (define_insn "*vsx_lxvd2x8_le_V8HI"
1650   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1651         (vec_select:V8HI
1652           (match_operand:V8HI 1 "memory_operand" "Z")
1653           (parallel [(const_int 4) (const_int 5)
1654                      (const_int 6) (const_int 7)
1655                      (const_int 0) (const_int 1)
1656                      (const_int 2) (const_int 3)])))]
1657   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1658   "lxvd2x %x0,%y1"
1659   [(set_attr "type" "vecload")])
1660
1661 (define_insn "*vsx_lxvd2x16_le_V16QI"
1662   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1663         (vec_select:V16QI
1664           (match_operand:V16QI 1 "memory_operand" "Z")
1665           (parallel [(const_int 8) (const_int 9)
1666                      (const_int 10) (const_int 11)
1667                      (const_int 12) (const_int 13)
1668                      (const_int 14) (const_int 15)
1669                      (const_int 0) (const_int 1)
1670                      (const_int 2) (const_int 3)
1671                      (const_int 4) (const_int 5)
1672                      (const_int 6) (const_int 7)])))]
1673   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1674   "lxvd2x %x0,%y1"
1675   [(set_attr "type" "vecload")])
1676
1677 ;; stxvd2x for little endian stores.  We need several of
1678 ;; these since the form of the PARALLEL differs by mode.
1679 (define_insn "*vsx_stxvd2x2_le_<mode>"
1680   [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
1681         (vec_select:VSX_LE
1682           (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1683           (parallel [(const_int 1) (const_int 0)])))]
1684   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1685   "stxvd2x %x1,%y0"
1686   [(set_attr "type" "vecstore")])
1687
1688 (define_insn "*vsx_stxvd2x4_le_<mode>"
1689   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
1690         (vec_select:VSX_W
1691           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1692           (parallel [(const_int 2) (const_int 3)
1693                      (const_int 0) (const_int 1)])))]
1694   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1695   "stxvd2x %x1,%y0"
1696   [(set_attr "type" "vecstore")])
1697
1698 (define_insn "*vsx_stxvd2x8_le_V8HI"
1699   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1700         (vec_select:V8HI
1701           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1702           (parallel [(const_int 4) (const_int 5)
1703                      (const_int 6) (const_int 7)
1704                      (const_int 0) (const_int 1)
1705                      (const_int 2) (const_int 3)])))]
1706   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1707   "stxvd2x %x1,%y0"
1708   [(set_attr "type" "vecstore")])
1709
1710 (define_insn "*vsx_stxvd2x16_le_V16QI"
1711   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1712         (vec_select:V16QI
1713           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1714           (parallel [(const_int 8) (const_int 9)
1715                      (const_int 10) (const_int 11)
1716                      (const_int 12) (const_int 13)
1717                      (const_int 14) (const_int 15)
1718                      (const_int 0) (const_int 1)
1719                      (const_int 2) (const_int 3)
1720                      (const_int 4) (const_int 5)
1721                      (const_int 6) (const_int 7)])))]
1722   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1723   "stxvd2x %x1,%y0"
1724   [(set_attr "type" "vecstore")])
1725
1726 ;; Convert a TImode value into V1TImode
1727 (define_expand "vsx_set_v1ti"
1728   [(match_operand:V1TI 0 "nonimmediate_operand" "")
1729    (match_operand:V1TI 1 "nonimmediate_operand" "")
1730    (match_operand:TI 2 "input_operand" "")
1731    (match_operand:QI 3 "u5bit_cint_operand" "")]
1732   "VECTOR_MEM_VSX_P (V1TImode)"
1733 {
1734   if (operands[3] != const0_rtx)
1735     gcc_unreachable ();
1736
1737   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
1738   DONE;
1739 })
1740
1741 ;; Set the element of a V2DI/VD2F mode
1742 (define_insn "vsx_set_<mode>"
1743   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
1744         (unspec:VSX_D
1745          [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
1746           (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
1747           (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
1748          UNSPEC_VSX_SET))]
1749   "VECTOR_MEM_VSX_P (<MODE>mode)"
1750 {
1751   int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
1752   if (INTVAL (operands[3]) == idx_first)
1753     return \"xxpermdi %x0,%x2,%x1,1\";
1754   else if (INTVAL (operands[3]) == 1 - idx_first)
1755     return \"xxpermdi %x0,%x1,%x2,0\";
1756   else
1757     gcc_unreachable ();
1758 }
1759   [(set_attr "type" "vecperm")])
1760
1761 ;; Extract a DF/DI element from V2DF/V2DI
1762 (define_expand "vsx_extract_<mode>"
1763   [(set (match_operand:<VS_scalar> 0 "register_operand" "")
1764         (vec_select:<VS_scalar> (match_operand:VSX_D 1 "register_operand" "")
1765                        (parallel
1766                         [(match_operand:QI 2 "u5bit_cint_operand" "")])))]
1767   "VECTOR_MEM_VSX_P (<MODE>mode)"
1768   "")
1769
1770 ;; Optimize cases were we can do a simple or direct move.
1771 ;; Or see if we can avoid doing the move at all
1772 (define_insn "*vsx_extract_<mode>_internal1"
1773   [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,<VS_64reg>,r")
1774         (vec_select:<VS_scalar>
1775          (match_operand:VSX_D 1 "register_operand" "d,<VS_64reg>,<VS_64dm>")
1776          (parallel
1777           [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
1778   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
1779 {
1780   int op0_regno = REGNO (operands[0]);
1781   int op1_regno = REGNO (operands[1]);
1782
1783   if (op0_regno == op1_regno)
1784     return "nop";
1785
1786   if (INT_REGNO_P (op0_regno))
1787     return "mfvsrd %0,%x1";
1788
1789   if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
1790     return "fmr %0,%1";
1791
1792   return "xxlor %x0,%x1,%x1";
1793 }
1794   [(set_attr "type" "fp,vecsimple,mftgpr")
1795    (set_attr "length" "4")])
1796
1797 (define_insn "*vsx_extract_<mode>_internal2"
1798   [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=d,<VS_64reg>,<VS_64reg>")
1799         (vec_select:<VS_scalar>
1800          (match_operand:VSX_D 1 "vsx_register_operand" "d,wd,wd")
1801          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "wD,wD,i")])))]
1802   "VECTOR_MEM_VSX_P (<MODE>mode)
1803    && (!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE
1804        || INTVAL (operands[2]) != VECTOR_ELEMENT_SCALAR_64BIT)"
1805 {
1806   int fldDM;
1807   gcc_assert (UINTVAL (operands[2]) <= 1);
1808
1809   if (INTVAL (operands[2]) == VECTOR_ELEMENT_SCALAR_64BIT)
1810     {
1811       int op0_regno = REGNO (operands[0]);
1812       int op1_regno = REGNO (operands[1]);
1813
1814       if (op0_regno == op1_regno)
1815         return "nop";
1816
1817       if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
1818         return "fmr %0,%1";
1819
1820       return "xxlor %x0,%x1,%x1";
1821     }
1822
1823   fldDM = INTVAL (operands[2]) << 1;
1824   if (!BYTES_BIG_ENDIAN)
1825     fldDM = 3 - fldDM;
1826   operands[3] = GEN_INT (fldDM);
1827   return "xxpermdi %x0,%x1,%x1,%3";
1828 }
1829   [(set_attr "type" "fp,vecsimple,vecperm")
1830    (set_attr "length" "4")])
1831
1832 ;; Optimize extracting a single scalar element from memory if the scalar is in
1833 ;; the correct location to use a single load.
1834 (define_insn "*vsx_extract_<mode>_load"
1835   [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr")
1836         (vec_select:<VS_scalar>
1837          (match_operand:VSX_D 1 "memory_operand" "m,Z,m")
1838          (parallel [(const_int 0)])))]
1839   "VECTOR_MEM_VSX_P (<MODE>mode)"
1840   "@
1841    lfd%U1%X1 %0,%1
1842    lxsd%U1x %x0,%y1
1843    ld%U1%X1 %0,%1"
1844   [(set_attr "type" "fpload,fpload,load")
1845    (set_attr "length" "4")])
1846
1847 ;; Optimize storing a single scalar element that is the right location to
1848 ;; memory
1849 (define_insn "*vsx_extract_<mode>_store"
1850   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,?Z")
1851         (vec_select:<VS_scalar>
1852          (match_operand:VSX_D 1 "register_operand" "d,wd,<VSa>")
1853          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
1854   "VECTOR_MEM_VSX_P (<MODE>mode)"
1855   "@
1856    stfd%U0%X0 %1,%0
1857    stxsd%U0x %x1,%y0
1858    stxsd%U0x %x1,%y0"
1859   [(set_attr "type" "fpstore")
1860    (set_attr "length" "4")])
1861
1862 ;; Extract a SF element from V4SF
1863 (define_insn_and_split "vsx_extract_v4sf"
1864   [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
1865         (vec_select:SF
1866          (match_operand:V4SF 1 "vsx_register_operand" "<VSa>,<VSa>")
1867          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
1868    (clobber (match_scratch:V4SF 3 "=X,0"))]
1869   "VECTOR_UNIT_VSX_P (V4SFmode)"
1870   "@
1871    xscvspdp %x0,%x1
1872    #"
1873   ""
1874   [(const_int 0)]
1875   "
1876 {
1877   rtx op0 = operands[0];
1878   rtx op1 = operands[1];
1879   rtx op2 = operands[2];
1880   rtx op3 = operands[3];
1881   rtx tmp;
1882   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
1883
1884   if (ele == 0)
1885     tmp = op1;
1886   else
1887     {
1888       if (GET_CODE (op3) == SCRATCH)
1889         op3 = gen_reg_rtx (V4SFmode);
1890       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
1891       tmp = op3;
1892     }
1893   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
1894   DONE;
1895 }"
1896   [(set_attr "length" "4,8")
1897    (set_attr "type" "fp")])
1898
1899 ;; Expand the builtin form of xxpermdi to canonical rtl.
1900 (define_expand "vsx_xxpermdi_<mode>"
1901   [(match_operand:VSX_L 0 "vsx_register_operand" "")
1902    (match_operand:VSX_L 1 "vsx_register_operand" "")
1903    (match_operand:VSX_L 2 "vsx_register_operand" "")
1904    (match_operand:QI 3 "u5bit_cint_operand" "")]
1905   "VECTOR_MEM_VSX_P (<MODE>mode)"
1906 {
1907   rtx target = operands[0];
1908   rtx op0 = operands[1];
1909   rtx op1 = operands[2];
1910   int mask = INTVAL (operands[3]);
1911   rtx perm0 = GEN_INT ((mask >> 1) & 1);
1912   rtx perm1 = GEN_INT ((mask & 1) + 2);
1913   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
1914
1915   if (<MODE>mode == V2DFmode)
1916     gen = gen_vsx_xxpermdi2_v2df_1;
1917   else
1918     {
1919       gen = gen_vsx_xxpermdi2_v2di_1;
1920       if (<MODE>mode != V2DImode)
1921         {
1922           target = gen_lowpart (V2DImode, target);
1923           op0 = gen_lowpart (V2DImode, op0);
1924           op1 = gen_lowpart (V2DImode, op1);
1925         }
1926     }
1927   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
1928      transformation we don't want; it is necessary for
1929      rs6000_expand_vec_perm_const_1 but not for this use.  So we
1930      prepare for that by reversing the transformation here.  */
1931   if (BYTES_BIG_ENDIAN)
1932     emit_insn (gen (target, op0, op1, perm0, perm1));
1933   else
1934     {
1935       rtx p0 = GEN_INT (3 - INTVAL (perm1));
1936       rtx p1 = GEN_INT (3 - INTVAL (perm0));
1937       emit_insn (gen (target, op1, op0, p0, p1));
1938     }
1939   DONE;
1940 })
1941
1942 (define_insn "vsx_xxpermdi2_<mode>_1"
1943   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
1944         (vec_select:VSX_D
1945           (vec_concat:<VS_double>
1946             (match_operand:VSX_D 1 "vsx_register_operand" "wd")
1947             (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
1948           (parallel [(match_operand 3 "const_0_to_1_operand" "")
1949                      (match_operand 4 "const_2_to_3_operand" "")])))]
1950   "VECTOR_MEM_VSX_P (<MODE>mode)"
1951 {
1952   int op3, op4, mask;
1953
1954   /* For little endian, swap operands and invert/swap selectors
1955      to get the correct xxpermdi.  The operand swap sets up the
1956      inputs as a little endian array.  The selectors are swapped
1957      because they are defined to use big endian ordering.  The
1958      selectors are inverted to get the correct doublewords for
1959      little endian ordering.  */
1960   if (BYTES_BIG_ENDIAN)
1961     {
1962       op3 = INTVAL (operands[3]);
1963       op4 = INTVAL (operands[4]);
1964     }
1965   else
1966     {
1967       op3 = 3 - INTVAL (operands[4]);
1968       op4 = 3 - INTVAL (operands[3]);
1969     }
1970
1971   mask = (op3 << 1) | (op4 - 2);
1972   operands[3] = GEN_INT (mask);
1973
1974   if (BYTES_BIG_ENDIAN)
1975     return "xxpermdi %x0,%x1,%x2,%3";
1976   else
1977     return "xxpermdi %x0,%x2,%x1,%3";
1978 }
1979   [(set_attr "type" "vecperm")])
1980
1981 (define_expand "vec_perm_const<mode>"
1982   [(match_operand:VSX_D 0 "vsx_register_operand" "")
1983    (match_operand:VSX_D 1 "vsx_register_operand" "")
1984    (match_operand:VSX_D 2 "vsx_register_operand" "")
1985    (match_operand:V2DI  3 "" "")]
1986   "VECTOR_MEM_VSX_P (<MODE>mode)"
1987 {
1988   if (rs6000_expand_vec_perm_const (operands))
1989     DONE;
1990   else
1991     FAIL;
1992 })
1993
1994 ;; Expanders for builtins
1995 (define_expand "vsx_mergel_<mode>"
1996   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
1997    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
1998    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
1999   "VECTOR_MEM_VSX_P (<MODE>mode)"
2000 {
2001   rtvec v;
2002   rtx x;
2003
2004   /* Special handling for LE with -maltivec=be.  */
2005   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2006     {
2007       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2008       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2009     }
2010   else
2011     {
2012       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2013       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2014     }
2015
2016   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2017   emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
2018   DONE;
2019 })
2020
2021 (define_expand "vsx_mergeh_<mode>"
2022   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2023    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2024    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2025   "VECTOR_MEM_VSX_P (<MODE>mode)"
2026 {
2027   rtvec v;
2028   rtx x;
2029
2030   /* Special handling for LE with -maltivec=be.  */
2031   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2032     {
2033       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2034       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2035     }
2036   else
2037     {
2038       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2039       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2040     }
2041
2042   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2043   emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
2044   DONE;
2045 })
2046
2047 ;; V2DF/V2DI splat
2048 (define_insn "vsx_splat_<mode>"
2049   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?<VSa>,?<VSa>,?<VSa>")
2050         (vec_duplicate:VSX_D
2051          (match_operand:<VS_scalar> 1 "splat_input_operand" "<VS_64reg>,f,Z,<VSa>,<VSa>,Z")))]
2052   "VECTOR_MEM_VSX_P (<MODE>mode)"
2053   "@
2054    xxpermdi %x0,%x1,%x1,0
2055    xxpermdi %x0,%x1,%x1,0
2056    lxvdsx %x0,%y1
2057    xxpermdi %x0,%x1,%x1,0
2058    xxpermdi %x0,%x1,%x1,0
2059    lxvdsx %x0,%y1"
2060   [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
2061
2062 ;; V4SF/V4SI splat
2063 (define_insn "vsx_xxspltw_<mode>"
2064   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2065         (vec_duplicate:VSX_W
2066          (vec_select:<VS_scalar>
2067           (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2068           (parallel
2069            [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
2070   "VECTOR_MEM_VSX_P (<MODE>mode)"
2071 {
2072   if (!BYTES_BIG_ENDIAN)
2073     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
2074
2075   return "xxspltw %x0,%x1,%2";
2076 }
2077   [(set_attr "type" "vecperm")])
2078
2079 (define_insn "vsx_xxspltw_<mode>_direct"
2080   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2081         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2082                        (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
2083                       UNSPEC_VSX_XXSPLTW))]
2084   "VECTOR_MEM_VSX_P (<MODE>mode)"
2085   "xxspltw %x0,%x1,%2"
2086   [(set_attr "type" "vecperm")])
2087
2088 ;; V2DF/V2DI splat for use by vec_splat builtin
2089 (define_insn "vsx_xxspltd_<mode>"
2090   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2091         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
2092                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
2093                       UNSPEC_VSX_XXSPLTD))]
2094   "VECTOR_MEM_VSX_P (<MODE>mode)"
2095 {
2096   if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
2097       || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
2098     return "xxpermdi %x0,%x1,%x1,0";
2099   else
2100     return "xxpermdi %x0,%x1,%x1,3";
2101 }
2102   [(set_attr "type" "vecperm")])
2103
2104 ;; V4SF/V4SI interleave
2105 (define_insn "vsx_xxmrghw_<mode>"
2106   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2107         (vec_select:VSX_W
2108           (vec_concat:<VS_double>
2109             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2110             (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
2111           (parallel [(const_int 0) (const_int 4)
2112                      (const_int 1) (const_int 5)])))]
2113   "VECTOR_MEM_VSX_P (<MODE>mode)"
2114 {
2115   if (BYTES_BIG_ENDIAN)
2116     return "xxmrghw %x0,%x1,%x2";
2117   else
2118     return "xxmrglw %x0,%x2,%x1";
2119 }
2120   [(set_attr "type" "vecperm")])
2121
2122 (define_insn "vsx_xxmrglw_<mode>"
2123   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2124         (vec_select:VSX_W
2125           (vec_concat:<VS_double>
2126             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2127             (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
2128           (parallel [(const_int 2) (const_int 6)
2129                      (const_int 3) (const_int 7)])))]
2130   "VECTOR_MEM_VSX_P (<MODE>mode)"
2131 {
2132   if (BYTES_BIG_ENDIAN)
2133     return "xxmrglw %x0,%x1,%x2";
2134   else
2135     return "xxmrghw %x0,%x2,%x1";
2136 }
2137   [(set_attr "type" "vecperm")])
2138
2139 ;; Shift left double by word immediate
2140 (define_insn "vsx_xxsldwi_<mode>"
2141   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
2142         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
2143                        (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
2144                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
2145                       UNSPEC_VSX_SLDWI))]
2146   "VECTOR_MEM_VSX_P (<MODE>mode)"
2147   "xxsldwi %x0,%x1,%x2,%3"
2148   [(set_attr "type" "vecperm")])
2149
2150 \f
2151 ;; Vector reduction insns and splitters
2152
2153 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df"
2154   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
2155         (VEC_reduc:V2DF
2156          (vec_concat:V2DF
2157           (vec_select:DF
2158            (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2159            (parallel [(const_int 1)]))
2160           (vec_select:DF
2161            (match_dup 1)
2162            (parallel [(const_int 0)])))
2163          (match_dup 1)))
2164    (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
2165   "VECTOR_UNIT_VSX_P (V2DFmode)"
2166   "#"
2167   ""
2168   [(const_int 0)]
2169   "
2170 {
2171   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
2172              ? gen_reg_rtx (V2DFmode)
2173              : operands[2];
2174   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
2175   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
2176   DONE;
2177 }"
2178   [(set_attr "length" "8")
2179    (set_attr "type" "veccomplex")])
2180
2181 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf"
2182   [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
2183         (VEC_reduc:V4SF
2184          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2185          (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
2186    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2187    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
2188   "VECTOR_UNIT_VSX_P (V4SFmode)"
2189   "#"
2190   ""
2191   [(const_int 0)]
2192   "
2193 {
2194   rtx op0 = operands[0];
2195   rtx op1 = operands[1];
2196   rtx tmp2, tmp3, tmp4;
2197
2198   if (can_create_pseudo_p ())
2199     {
2200       tmp2 = gen_reg_rtx (V4SFmode);
2201       tmp3 = gen_reg_rtx (V4SFmode);
2202       tmp4 = gen_reg_rtx (V4SFmode);
2203     }
2204   else
2205     {
2206       tmp2 = operands[2];
2207       tmp3 = operands[3];
2208       tmp4 = tmp2;
2209     }
2210
2211   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2212   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2213   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2214   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
2215   DONE;
2216 }"
2217   [(set_attr "length" "16")
2218    (set_attr "type" "veccomplex")])
2219
2220 ;; Combiner patterns with the vector reduction patterns that knows we can get
2221 ;; to the top element of the V2DF array without doing an extract.
2222
2223 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
2224   [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
2225         (vec_select:DF
2226          (VEC_reduc:V2DF
2227           (vec_concat:V2DF
2228            (vec_select:DF
2229             (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2230             (parallel [(const_int 1)]))
2231            (vec_select:DF
2232             (match_dup 1)
2233             (parallel [(const_int 0)])))
2234           (match_dup 1))
2235          (parallel [(const_int 1)])))
2236    (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
2237   "VECTOR_UNIT_VSX_P (V2DFmode)"
2238   "#"
2239   ""
2240   [(const_int 0)]
2241   "
2242 {
2243   rtx hi = gen_highpart (DFmode, operands[1]);
2244   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
2245             ? gen_reg_rtx (DFmode)
2246             : operands[2];
2247
2248   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
2249   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
2250   DONE;
2251 }"
2252   [(set_attr "length" "8")
2253    (set_attr "type" "veccomplex")])
2254
2255 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
2256   [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
2257         (vec_select:SF
2258          (VEC_reduc:V4SF
2259           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2260           (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
2261          (parallel [(const_int 3)])))
2262    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2263    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
2264    (clobber (match_scratch:V4SF 4 "=0,0"))]
2265   "VECTOR_UNIT_VSX_P (V4SFmode)"
2266   "#"
2267   ""
2268   [(const_int 0)]
2269   "
2270 {
2271   rtx op0 = operands[0];
2272   rtx op1 = operands[1];
2273   rtx tmp2, tmp3, tmp4, tmp5;
2274
2275   if (can_create_pseudo_p ())
2276     {
2277       tmp2 = gen_reg_rtx (V4SFmode);
2278       tmp3 = gen_reg_rtx (V4SFmode);
2279       tmp4 = gen_reg_rtx (V4SFmode);
2280       tmp5 = gen_reg_rtx (V4SFmode);
2281     }
2282   else
2283     {
2284       tmp2 = operands[2];
2285       tmp3 = operands[3];
2286       tmp4 = tmp2;
2287       tmp5 = operands[4];
2288     }
2289
2290   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2291   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2292   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2293   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
2294   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
2295   DONE;
2296 }"
2297   [(set_attr "length" "20")
2298    (set_attr "type" "veccomplex")])
2299
2300 \f
2301 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
2302 (define_peephole
2303   [(set (match_operand:P 0 "base_reg_operand" "")
2304         (match_operand:P 1 "short_cint_operand" ""))
2305    (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2306         (mem:VSX_M2 (plus:P (match_dup 0)
2307                             (match_operand:P 3 "int_reg_operand" ""))))]
2308   "TARGET_VSX && TARGET_P8_FUSION"
2309   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2310   [(set_attr "length" "8")
2311    (set_attr "type" "vecload")])
2312
2313 (define_peephole
2314   [(set (match_operand:P 0 "base_reg_operand" "")
2315         (match_operand:P 1 "short_cint_operand" ""))
2316    (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2317         (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
2318                             (match_dup 0))))]
2319   "TARGET_VSX && TARGET_P8_FUSION"
2320   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2321   [(set_attr "length" "8")
2322    (set_attr "type" "vecload")])