gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2017 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Iterator for both scalar and vector floating point types supported by VSX
  25 (define_mode_iterator VSX_B [DF V4SF V2DF])
  26
  27 ;; Iterator for the 2 64-bit vector types
  28 (define_mode_iterator VSX_D [V2DF V2DI])
  29
  30 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  31 ;; types that goes in a single vector register.
  32 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  33                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  34                                   (TI   "TARGET_VSX_TIMODE")
  35                                   V1TI])
  36
  37 ;; Iterator for the 2 32-bit vector types
  38 (define_mode_iterator VSX_W [V4SF V4SI])
  39
  40 ;; Iterator for the DF types
  41 (define_mode_iterator VSX_DF [V2DF DF])
  42
  43 ;; Iterator for vector floating point types supported by VSX
  44 (define_mode_iterator VSX_F [V4SF V2DF])
  45
  46 ;; Iterator for logical types supported by VSX
  47 (define_mode_iterator VSX_L [V16QI
  48                              V8HI
  49                              V4SI
  50                              V2DI
  51                              V4SF
  52                              V2DF
  53                              V1TI
  54                              TI
  55                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  56                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  57
  58 ;; Iterator for memory moves.
  59 (define_mode_iterator VSX_M [V16QI
  60                              V8HI
  61                              V4SI
  62                              V2DI
  63                              V4SF
  64                              V2DF
  65                              V1TI
  66                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  67                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  68                              (TI        "TARGET_VSX_TIMODE")])
  69
  70 ;; Map into the appropriate load/store name based on the type
  71 (define_mode_attr VSm  [(V16QI "vw4")
  72                         (V8HI  "vw4")
  73                         (V4SI  "vw4")
  74                         (V4SF  "vw4")
  75                         (V2DF  "vd2")
  76                         (V2DI  "vd2")
  77                         (DF    "d")
  78                         (TF    "vd2")
  79                         (KF    "vd2")
  80                         (V1TI  "vd2")
  81                         (TI    "vd2")])
  82
  83 ;; Map into the appropriate suffix based on the type
  84 (define_mode_attr VSs   [(V16QI "sp")
  85                          (V8HI  "sp")
  86                          (V4SI  "sp")
  87                          (V4SF  "sp")
  88                          (V2DF  "dp")
  89                          (V2DI  "dp")
  90                          (DF    "dp")
  91                          (SF    "sp")
  92                          (TF    "dp")
  93                          (KF    "dp")
  94                          (V1TI  "dp")
  95                          (TI    "dp")])
  96
  97 ;; Map the register class used
  98 (define_mode_attr VSr   [(V16QI "v")
  99                          (V8HI  "v")
 100                          (V4SI  "v")
 101                          (V4SF  "wf")
 102                          (V2DI  "wd")
 103                          (V2DF  "wd")
 104                          (DI    "wi")
 105                          (DF    "ws")
 106                          (SF    "ww")
 107                          (TF    "wp")
 108                          (KF    "wq")
 109                          (V1TI  "v")
 110                          (TI    "wt")])
 111
 112 ;; Map the register class used for float<->int conversions (floating point side)
 113 ;; VSr2 is the preferred register class, VSr3 is any register class that will
 114 ;; hold the data
 115 (define_mode_attr VSr2  [(V2DF  "wd")
 116                          (V4SF  "wf")
 117                          (DF    "ws")
 118                          (SF    "ww")
 119                          (DI    "wi")
 120                          (KF    "wq")
 121                          (TF    "wp")])
 122
 123 (define_mode_attr VSr3  [(V2DF  "wa")
 124                          (V4SF  "wa")
 125                          (DF    "ws")
 126                          (SF    "ww")
 127                          (DI    "wi")
 128                          (KF    "wq")
 129                          (TF    "wp")])
 130
 131 ;; Map the register class for sp<->dp float conversions, destination
 132 (define_mode_attr VSr4  [(SF    "ws")
 133                          (DF    "f")
 134                          (V2DF  "wd")
 135                          (V4SF  "v")])
 136
 137 ;; Map the register class for sp<->dp float conversions, source
 138 (define_mode_attr VSr5  [(SF    "ws")
 139                          (DF    "f")
 140                          (V2DF  "v")
 141                          (V4SF  "wd")])
 142
 143 ;; The VSX register class that a type can occupy, even if it is not the
 144 ;; preferred register class (VSr is the preferred register class that will get
 145 ;; allocated first).
 146 (define_mode_attr VSa   [(V16QI "wa")
 147                          (V8HI  "wa")
 148                          (V4SI  "wa")
 149                          (V4SF  "wa")
 150                          (V2DI  "wa")
 151                          (V2DF  "wa")
 152                          (DI    "wi")
 153                          (DF    "ws")
 154                          (SF    "ww")
 155                          (V1TI  "wa")
 156                          (TI    "wt")
 157                          (TF    "wp")
 158                          (KF    "wq")])
 159
 160 ;; Same size integer type for floating point data
 161 (define_mode_attr VSi [(V4SF  "v4si")
 162                        (V2DF  "v2di")
 163                        (DF    "di")])
 164
 165 (define_mode_attr VSI [(V4SF  "V4SI")
 166                        (V2DF  "V2DI")
 167                        (DF    "DI")])
 168
 169 ;; Word size for same size conversion
 170 (define_mode_attr VSc [(V4SF "w")
 171                        (V2DF "d")
 172                        (DF   "d")])
 173
 174 ;; Map into either s or v, depending on whether this is a scalar or vector
 175 ;; operation
 176 (define_mode_attr VSv   [(V16QI "v")
 177                          (V8HI  "v")
 178                          (V4SI  "v")
 179                          (V4SF  "v")
 180                          (V2DI  "v")
 181                          (V2DF  "v")
 182                          (V1TI  "v")
 183                          (DF    "s")
 184                          (KF    "v")])
 185
 186 ;; Appropriate type for add ops (and other simple FP ops)
 187 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 188                                  (V4SF "vecfloat")
 189                                  (DF   "fp")])
 190
 191 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
 192                                    (V4SF "fp_addsub_s")
 193                                    (DF   "fp_addsub_d")])
 194
 195 ;; Appropriate type for multiply ops
 196 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 197                                  (V4SF "vecfloat")
 198                                  (DF   "dmul")])
 199
 200 (define_mode_attr VSfptype_mul  [(V2DF "fp_mul_d")
 201                                  (V4SF "fp_mul_s")
 202                                  (DF   "fp_mul_d")])
 203
 204 ;; Appropriate type for divide ops.
 205 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 206                                  (V4SF "vecfdiv")
 207                                  (DF   "ddiv")])
 208
 209 (define_mode_attr VSfptype_div  [(V2DF "fp_div_d")
 210                                  (V4SF "fp_div_s")
 211                                  (DF   "fp_div_d")])
 212
 213 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 214 ;; the scalar sqrt
 215 (define_mode_attr VStype_sqrt   [(V2DF "dsqrt")
 216                                  (V4SF "ssqrt")
 217                                  (DF   "dsqrt")])
 218
 219 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
 220                                  (V4SF "fp_sqrt_s")
 221                                  (DF   "fp_sqrt_d")])
 222
 223 ;; Iterator and modes for sp<->dp conversions
 224 ;; Because scalar SF values are represented internally as double, use the
 225 ;; V4SF type to represent this than SF.
 226 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
 227
 228 (define_mode_attr VS_spdp_res [(DF      "V4SF")
 229                                (V4SF    "V2DF")
 230                                (V2DF    "V4SF")])
 231
 232 (define_mode_attr VS_spdp_insn [(DF     "xscvdpsp")
 233                                 (V4SF   "xvcvspdp")
 234                                 (V2DF   "xvcvdpsp")])
 235
 236 (define_mode_attr VS_spdp_type [(DF     "fp")
 237                                 (V4SF   "vecdouble")
 238                                 (V2DF   "vecdouble")])
 239
 240 ;; Map the scalar mode for a vector type
 241 (define_mode_attr VS_scalar [(V1TI      "TI")
 242                              (V2DF      "DF")
 243                              (V2DI      "DI")
 244                              (V4SF      "SF")
 245                              (V4SI      "SI")
 246                              (V8HI      "HI")
 247                              (V16QI     "QI")])
 248
 249 ;; Map to a double-sized vector mode
 250 (define_mode_attr VS_double [(V4SI      "V8SI")
 251                              (V4SF      "V8SF")
 252                              (V2DI      "V4DI")
 253                              (V2DF      "V4DF")
 254                              (V1TI      "V2TI")])
 255
 256 ;; Map register class for 64-bit element in 128-bit vector for direct moves
 257 ;; to/from gprs
 258 (define_mode_attr VS_64dm [(V2DF        "wk")
 259                            (V2DI        "wj")])
 260
 261 ;; Map register class for 64-bit element in 128-bit vector for normal register
 262 ;; to register moves
 263 (define_mode_attr VS_64reg [(V2DF       "ws")
 264                             (V2DI       "wi")])
 265
 266 ;; Iterators for loading constants with xxspltib
 267 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 268 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 269
 270 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 271 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 272 ;; done on ISA 2.07 and not just ISA 3.0.
 273 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 274 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 275
 276 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 277                                      (V8HI "h")
 278                                      (V4SI "w")])
 279
 280 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 281 ;; insert to validate the operand number.
 282 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 283                                          (V8HI  "const_0_to_7_operand")
 284                                          (V4SI  "const_0_to_3_operand")])
 285
 286 ;; Mode attribute to give the constraint for vector extract and insert
 287 ;; operations.
 288 (define_mode_attr VSX_EX [(V16QI "v")
 289                           (V8HI  "v")
 290                           (V4SI  "wa")])
 291
 292 ;; Mode iterator for binary floating types other than double to
 293 ;; optimize convert to that floating point type from an extract
 294 ;; of an integer type
 295 (define_mode_iterator VSX_EXTRACT_FL [SF
 296                                       (IF "FLOAT128_2REG_P (IFmode)")
 297                                       (KF "TARGET_FLOAT128_HW")
 298                                       (TF "FLOAT128_2REG_P (TFmode)
 299                                            || (FLOAT128_IEEE_P (TFmode)
 300                                                && TARGET_FLOAT128_HW)")])
 301
 302 ;; Mode iterator for binary floating types that have a direct conversion
 303 ;; from 64-bit integer to floating point
 304 (define_mode_iterator FL_CONV [SF
 305                                DF
 306                                (KF "TARGET_FLOAT128_HW")
 307                                (TF "TARGET_FLOAT128_HW
 308                                     && FLOAT128_IEEE_P (TFmode)")])
 309
 310 ;; Iterator for the 2 short vector types to do a splat from an integer
 311 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 312
 313 ;; Mode attribute to give the count for the splat instruction to splat
 314 ;; the value in the 64-bit integer slot
 315 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 316
 317 ;; Mode attribute to give the suffix for the splat instruction
 318 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 319
 320 ;; Constants for creating unspecs
 321 (define_c_enum "unspec"
 322   [UNSPEC_VSX_CONCAT
 323    UNSPEC_VSX_CVDPSXWS
 324    UNSPEC_VSX_CVDPUXWS
 325    UNSPEC_VSX_CVSPDP
 326    UNSPEC_VSX_CVSPDPN
 327    UNSPEC_VSX_CVDPSPN
 328    UNSPEC_VSX_CVSXWDP
 329    UNSPEC_VSX_CVUXWDP
 330    UNSPEC_VSX_CVSXDSP
 331    UNSPEC_VSX_CVUXDSP
 332    UNSPEC_VSX_CVSPSXDS
 333    UNSPEC_VSX_CVSPUXDS
 334    UNSPEC_VSX_TDIV
 335    UNSPEC_VSX_TSQRT
 336    UNSPEC_VSX_SET
 337    UNSPEC_VSX_ROUND_I
 338    UNSPEC_VSX_ROUND_IC
 339    UNSPEC_VSX_SLDWI
 340    UNSPEC_VSX_XXSPLTW
 341    UNSPEC_VSX_XXSPLTD
 342    UNSPEC_VSX_DIVSD
 343    UNSPEC_VSX_DIVUD
 344    UNSPEC_VSX_MULSD
 345    UNSPEC_VSX_XVCVSXDDP
 346    UNSPEC_VSX_XVCVUXDDP
 347    UNSPEC_VSX_XVCVDPSXDS
 348    UNSPEC_VSX_XVCVDPUXDS
 349    UNSPEC_VSX_SIGN_EXTEND
 350    UNSPEC_VSX_VSLO
 351    UNSPEC_VSX_EXTRACT
 352    UNSPEC_VSX_SXEXPDP
 353    UNSPEC_VSX_SXSIGDP
 354    UNSPEC_VSX_SIEXPDP
 355    UNSPEC_VSX_SCMPEXPDP
 356    UNSPEC_VSX_STSTDC
 357    UNSPEC_VSX_VXEXP
 358    UNSPEC_VSX_VXSIG
 359    UNSPEC_VSX_VIEXP
 360    UNSPEC_VSX_VTSTDC
 361    UNSPEC_VSX_VEC_INIT
 362    UNSPEC_LXVL
 363    UNSPEC_STXVL
 364    UNSPEC_VCLZLSBB
 365    UNSPEC_VCTZLSBB
 366    UNSPEC_VEXTUBLX
 367    UNSPEC_VEXTUHLX
 368    UNSPEC_VEXTUWLX
 369    UNSPEC_VEXTUBRX
 370    UNSPEC_VEXTUHRX
 371    UNSPEC_VEXTUWRX
 372    UNSPEC_VCMPNEB
 373    UNSPEC_VCMPNEZB
 374    UNSPEC_VCMPNEH
 375    UNSPEC_VCMPNEZH
 376    UNSPEC_VCMPNEW
 377    UNSPEC_VCMPNEZW
 378    UNSPEC_XXEXTRACTUW
 379    UNSPEC_XXINSERTW
 380   ])
 381
 382 ;; VSX moves
 383
 384 ;; The patterns for LE permuted loads and stores come before the general
 385 ;; VSX moves so they match first.
 386 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 387   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
 388         (match_operand:VSX_D 1 "memory_operand" "Z"))]
 389   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 390   "#"
 391   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 392   [(set (match_dup 2)
 393         (vec_select:<MODE>
 394           (match_dup 1)
 395           (parallel [(const_int 1) (const_int 0)])))
 396    (set (match_dup 0)
 397         (vec_select:<MODE>
 398           (match_dup 2)
 399           (parallel [(const_int 1) (const_int 0)])))]
 400   "
 401 {
 402   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 403                                        : operands[0];
 404 }
 405   "
 406   [(set_attr "type" "vecload")
 407    (set_attr "length" "8")])
 408
 409 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 410   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
 411         (match_operand:VSX_W 1 "memory_operand" "Z"))]
 412   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 413   "#"
 414   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 415   [(set (match_dup 2)
 416         (vec_select:<MODE>
 417           (match_dup 1)
 418           (parallel [(const_int 2) (const_int 3)
 419                      (const_int 0) (const_int 1)])))
 420    (set (match_dup 0)
 421         (vec_select:<MODE>
 422           (match_dup 2)
 423           (parallel [(const_int 2) (const_int 3)
 424                      (const_int 0) (const_int 1)])))]
 425   "
 426 {
 427   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 428                                        : operands[0];
 429 }
 430   "
 431   [(set_attr "type" "vecload")
 432    (set_attr "length" "8")])
 433
 434 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 435   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 436         (match_operand:V8HI 1 "memory_operand" "Z"))]
 437   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 438   "#"
 439   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 440   [(set (match_dup 2)
 441         (vec_select:V8HI
 442           (match_dup 1)
 443           (parallel [(const_int 4) (const_int 5)
 444                      (const_int 6) (const_int 7)
 445                      (const_int 0) (const_int 1)
 446                      (const_int 2) (const_int 3)])))
 447    (set (match_dup 0)
 448         (vec_select:V8HI
 449           (match_dup 2)
 450           (parallel [(const_int 4) (const_int 5)
 451                      (const_int 6) (const_int 7)
 452                      (const_int 0) (const_int 1)
 453                      (const_int 2) (const_int 3)])))]
 454   "
 455 {
 456   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 457                                        : operands[0];
 458 }
 459   "
 460   [(set_attr "type" "vecload")
 461    (set_attr "length" "8")])
 462
 463 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 464   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 465         (match_operand:V16QI 1 "memory_operand" "Z"))]
 466   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 467   "#"
 468   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 469   [(set (match_dup 2)
 470         (vec_select:V16QI
 471           (match_dup 1)
 472           (parallel [(const_int 8) (const_int 9)
 473                      (const_int 10) (const_int 11)
 474                      (const_int 12) (const_int 13)
 475                      (const_int 14) (const_int 15)
 476                      (const_int 0) (const_int 1)
 477                      (const_int 2) (const_int 3)
 478                      (const_int 4) (const_int 5)
 479                      (const_int 6) (const_int 7)])))
 480    (set (match_dup 0)
 481         (vec_select:V16QI
 482           (match_dup 2)
 483           (parallel [(const_int 8) (const_int 9)
 484                      (const_int 10) (const_int 11)
 485                      (const_int 12) (const_int 13)
 486                      (const_int 14) (const_int 15)
 487                      (const_int 0) (const_int 1)
 488                      (const_int 2) (const_int 3)
 489                      (const_int 4) (const_int 5)
 490                      (const_int 6) (const_int 7)])))]
 491   "
 492 {
 493   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 494                                        : operands[0];
 495 }
 496   "
 497   [(set_attr "type" "vecload")
 498    (set_attr "length" "8")])
 499
 500 (define_insn "*vsx_le_perm_store_<mode>"
 501   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
 502         (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
 503   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 504   "#"
 505   [(set_attr "type" "vecstore")
 506    (set_attr "length" "12")])
 507
 508 (define_split
 509   [(set (match_operand:VSX_D 0 "memory_operand" "")
 510         (match_operand:VSX_D 1 "vsx_register_operand" ""))]
 511   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 512   [(set (match_dup 2)
 513         (vec_select:<MODE>
 514           (match_dup 1)
 515           (parallel [(const_int 1) (const_int 0)])))
 516    (set (match_dup 0)
 517         (vec_select:<MODE>
 518           (match_dup 2)
 519           (parallel [(const_int 1) (const_int 0)])))]
 520 {
 521   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 522                                        : operands[1];
 523 })
 524
 525 ;; The post-reload split requires that we re-permute the source
 526 ;; register in case it is still live.
 527 (define_split
 528   [(set (match_operand:VSX_D 0 "memory_operand" "")
 529         (match_operand:VSX_D 1 "vsx_register_operand" ""))]
 530   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 531   [(set (match_dup 1)
 532         (vec_select:<MODE>
 533           (match_dup 1)
 534           (parallel [(const_int 1) (const_int 0)])))
 535    (set (match_dup 0)
 536         (vec_select:<MODE>
 537           (match_dup 1)
 538           (parallel [(const_int 1) (const_int 0)])))
 539    (set (match_dup 1)
 540         (vec_select:<MODE>
 541           (match_dup 1)
 542           (parallel [(const_int 1) (const_int 0)])))]
 543   "")
 544
 545 (define_insn "*vsx_le_perm_store_<mode>"
 546   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
 547         (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
 548   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 549   "#"
 550   [(set_attr "type" "vecstore")
 551    (set_attr "length" "12")])
 552
 553 (define_split
 554   [(set (match_operand:VSX_W 0 "memory_operand" "")
 555         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 556   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 557   [(set (match_dup 2)
 558         (vec_select:<MODE>
 559           (match_dup 1)
 560           (parallel [(const_int 2) (const_int 3)
 561                      (const_int 0) (const_int 1)])))
 562    (set (match_dup 0)
 563         (vec_select:<MODE>
 564           (match_dup 2)
 565           (parallel [(const_int 2) (const_int 3)
 566                      (const_int 0) (const_int 1)])))]
 567 {
 568   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 569                                        : operands[1];
 570 })
 571
 572 ;; The post-reload split requires that we re-permute the source
 573 ;; register in case it is still live.
 574 (define_split
 575   [(set (match_operand:VSX_W 0 "memory_operand" "")
 576         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 577   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 578   [(set (match_dup 1)
 579         (vec_select:<MODE>
 580           (match_dup 1)
 581           (parallel [(const_int 2) (const_int 3)
 582                      (const_int 0) (const_int 1)])))
 583    (set (match_dup 0)
 584         (vec_select:<MODE>
 585           (match_dup 1)
 586           (parallel [(const_int 2) (const_int 3)
 587                      (const_int 0) (const_int 1)])))
 588    (set (match_dup 1)
 589         (vec_select:<MODE>
 590           (match_dup 1)
 591           (parallel [(const_int 2) (const_int 3)
 592                      (const_int 0) (const_int 1)])))]
 593   "")
 594
 595 (define_insn "*vsx_le_perm_store_v8hi"
 596   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
 597         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 598   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 599   "#"
 600   [(set_attr "type" "vecstore")
 601    (set_attr "length" "12")])
 602
 603 (define_split
 604   [(set (match_operand:V8HI 0 "memory_operand" "")
 605         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 606   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 607   [(set (match_dup 2)
 608         (vec_select:V8HI
 609           (match_dup 1)
 610           (parallel [(const_int 4) (const_int 5)
 611                      (const_int 6) (const_int 7)
 612                      (const_int 0) (const_int 1)
 613                      (const_int 2) (const_int 3)])))
 614    (set (match_dup 0)
 615         (vec_select:V8HI
 616           (match_dup 2)
 617           (parallel [(const_int 4) (const_int 5)
 618                      (const_int 6) (const_int 7)
 619                      (const_int 0) (const_int 1)
 620                      (const_int 2) (const_int 3)])))]
 621 {
 622   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 623                                        : operands[1];
 624 })
 625
 626 ;; The post-reload split requires that we re-permute the source
 627 ;; register in case it is still live.
 628 (define_split
 629   [(set (match_operand:V8HI 0 "memory_operand" "")
 630         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 631   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 632   [(set (match_dup 1)
 633         (vec_select:V8HI
 634           (match_dup 1)
 635           (parallel [(const_int 4) (const_int 5)
 636                      (const_int 6) (const_int 7)
 637                      (const_int 0) (const_int 1)
 638                      (const_int 2) (const_int 3)])))
 639    (set (match_dup 0)
 640         (vec_select:V8HI
 641           (match_dup 1)
 642           (parallel [(const_int 4) (const_int 5)
 643                      (const_int 6) (const_int 7)
 644                      (const_int 0) (const_int 1)
 645                      (const_int 2) (const_int 3)])))
 646    (set (match_dup 1)
 647         (vec_select:V8HI
 648           (match_dup 1)
 649           (parallel [(const_int 4) (const_int 5)
 650                      (const_int 6) (const_int 7)
 651                      (const_int 0) (const_int 1)
 652                      (const_int 2) (const_int 3)])))]
 653   "")
 654
 655 (define_insn "*vsx_le_perm_store_v16qi"
 656   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
 657         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 658   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 659   "#"
 660   [(set_attr "type" "vecstore")
 661    (set_attr "length" "12")])
 662
 663 (define_split
 664   [(set (match_operand:V16QI 0 "memory_operand" "")
 665         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 666   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 667   [(set (match_dup 2)
 668         (vec_select:V16QI
 669           (match_dup 1)
 670           (parallel [(const_int 8) (const_int 9)
 671                      (const_int 10) (const_int 11)
 672                      (const_int 12) (const_int 13)
 673                      (const_int 14) (const_int 15)
 674                      (const_int 0) (const_int 1)
 675                      (const_int 2) (const_int 3)
 676                      (const_int 4) (const_int 5)
 677                      (const_int 6) (const_int 7)])))
 678    (set (match_dup 0)
 679         (vec_select:V16QI
 680           (match_dup 2)
 681           (parallel [(const_int 8) (const_int 9)
 682                      (const_int 10) (const_int 11)
 683                      (const_int 12) (const_int 13)
 684                      (const_int 14) (const_int 15)
 685                      (const_int 0) (const_int 1)
 686                      (const_int 2) (const_int 3)
 687                      (const_int 4) (const_int 5)
 688                      (const_int 6) (const_int 7)])))]
 689 {
 690   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 691                                        : operands[1];
 692 })
 693
 694 ;; The post-reload split requires that we re-permute the source
 695 ;; register in case it is still live.
 696 (define_split
 697   [(set (match_operand:V16QI 0 "memory_operand" "")
 698         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 699   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 700   [(set (match_dup 1)
 701         (vec_select:V16QI
 702           (match_dup 1)
 703           (parallel [(const_int 8) (const_int 9)
 704                      (const_int 10) (const_int 11)
 705                      (const_int 12) (const_int 13)
 706                      (const_int 14) (const_int 15)
 707                      (const_int 0) (const_int 1)
 708                      (const_int 2) (const_int 3)
 709                      (const_int 4) (const_int 5)
 710                      (const_int 6) (const_int 7)])))
 711    (set (match_dup 0)
 712         (vec_select:V16QI
 713           (match_dup 1)
 714           (parallel [(const_int 8) (const_int 9)
 715                      (const_int 10) (const_int 11)
 716                      (const_int 12) (const_int 13)
 717                      (const_int 14) (const_int 15)
 718                      (const_int 0) (const_int 1)
 719                      (const_int 2) (const_int 3)
 720                      (const_int 4) (const_int 5)
 721                      (const_int 6) (const_int 7)])))
 722    (set (match_dup 1)
 723         (vec_select:V16QI
 724           (match_dup 1)
 725           (parallel [(const_int 8) (const_int 9)
 726                      (const_int 10) (const_int 11)
 727                      (const_int 12) (const_int 13)
 728                      (const_int 14) (const_int 15)
 729                      (const_int 0) (const_int 1)
 730                      (const_int 2) (const_int 3)
 731                      (const_int 4) (const_int 5)
 732                      (const_int 6) (const_int 7)])))]
 733   "")
 734
 735 ;; Little endian word swapping for 128-bit types that are either scalars or the
 736 ;; special V1TI container class, which it is not appropriate to use vec_select
 737 ;; for the type.
 738 (define_insn "*vsx_le_permute_<mode>"
 739   [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
 740         (rotate:VSX_LE_128
 741          (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
 742          (const_int 64)))]
 743   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 744   "@
 745    xxpermdi %x0,%x1,%x1,2
 746    lxvd2x %x0,%y1
 747    stxvd2x %x1,%y0"
 748   [(set_attr "length" "4")
 749    (set_attr "type" "vecperm,vecload,vecstore")])
 750
 751 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
 752   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
 753         (rotate:VSX_LE_128
 754          (rotate:VSX_LE_128
 755           (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
 756           (const_int 64))
 757          (const_int 64)))]
 758   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 759   "@
 760    #
 761    xxlor %x0,%x1"
 762   ""
 763   [(set (match_dup 0) (match_dup 1))]
 764 {
 765   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
 766     {
 767       emit_note (NOTE_INSN_DELETED);
 768       DONE;
 769     }
 770 }
 771   [(set_attr "length" "0,4")
 772    (set_attr "type" "veclogical")])
 773
 774 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 775   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>")
 776         (match_operand:VSX_LE_128 1 "memory_operand" "Z"))]
 777   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 778   "#"
 779   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 780   [(set (match_dup 2)
 781         (rotate:VSX_LE_128 (match_dup 1)
 782                            (const_int 64)))
 783    (set (match_dup 0)
 784         (rotate:VSX_LE_128 (match_dup 2)
 785                            (const_int 64)))]
 786   "
 787 {
 788   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 789                                        : operands[0];
 790 }
 791   "
 792   [(set_attr "type" "vecload")
 793    (set_attr "length" "8")])
 794
 795 (define_insn "*vsx_le_perm_store_<mode>"
 796   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z")
 797         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))]
 798   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 799   "#"
 800   [(set_attr "type" "vecstore")
 801    (set_attr "length" "12")])
 802
 803 (define_split
 804   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
 805         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
 806   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
 807   [(set (match_dup 2)
 808         (rotate:VSX_LE_128 (match_dup 1)
 809                            (const_int 64)))
 810    (set (match_dup 0)
 811         (rotate:VSX_LE_128 (match_dup 2)
 812                            (const_int 64)))]
 813 {
 814   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 815                                        : operands[0];
 816 })
 817
 818 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
 819 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
 820 ;; floating point are handled by the more generic swap elimination pass.
 821 (define_peephole2
 822   [(set (match_operand:TI 0 "vsx_register_operand" "")
 823         (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
 824                    (const_int 64)))
 825    (set (match_operand:TI 2 "vsx_register_operand" "")
 826         (rotate:TI (match_dup 0)
 827                    (const_int 64)))]
 828   "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE && !TARGET_P9_VECTOR
 829    && (rtx_equal_p (operands[0], operands[2])
 830        || peep2_reg_dead_p (2, operands[0]))"
 831    [(set (match_dup 2) (match_dup 1))])
 832
 833 ;; The post-reload split requires that we re-permute the source
 834 ;; register in case it is still live.
 835 (define_split
 836   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
 837         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
 838   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
 839   [(set (match_dup 1)
 840         (rotate:VSX_LE_128 (match_dup 1)
 841                            (const_int 64)))
 842    (set (match_dup 0)
 843         (rotate:VSX_LE_128 (match_dup 1)
 844                            (const_int 64)))
 845    (set (match_dup 1)
 846         (rotate:VSX_LE_128 (match_dup 1)
 847                            (const_int 64)))]
 848   "")
 849
 850 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
 851 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
 852 (define_insn "xxspltib_v16qi"
 853   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 854         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
 855   "TARGET_P9_VECTOR"
 856 {
 857   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
 858   return "xxspltib %x0,%2";
 859 }
 860   [(set_attr "type" "vecperm")])
 861
 862 (define_insn "xxspltib_<mode>_nosplit"
 863   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
 864         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
 865   "TARGET_P9_VECTOR"
 866 {
 867   rtx op1 = operands[1];
 868   int value = 256;
 869   int num_insns = -1;
 870
 871   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
 872       || num_insns != 1)
 873     gcc_unreachable ();
 874
 875   operands[2] = GEN_INT (value & 0xff);
 876   return "xxspltib %x0,%2";
 877 }
 878   [(set_attr "type" "vecperm")])
 879
 880 (define_insn_and_split "*xxspltib_<mode>_split"
 881   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
 882         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
 883   "TARGET_P9_VECTOR"
 884   "#"
 885   "&& 1"
 886   [(const_int 0)]
 887 {
 888   int value = 256;
 889   int num_insns = -1;
 890   rtx op0 = operands[0];
 891   rtx op1 = operands[1];
 892   rtx tmp = ((can_create_pseudo_p ())
 893              ? gen_reg_rtx (V16QImode)
 894              : gen_lowpart (V16QImode, op0));
 895
 896   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
 897       || num_insns != 2)
 898     gcc_unreachable ();
 899
 900   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
 901
 902   if (<MODE>mode == V2DImode)
 903     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
 904
 905   else if (<MODE>mode == V4SImode)
 906     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
 907
 908   else if (<MODE>mode == V8HImode)
 909     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
 910
 911   else
 912     gcc_unreachable ();
 913
 914   DONE;
 915 }
 916   [(set_attr "type" "vecperm")
 917    (set_attr "length" "8")])
 918
 919
 920 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
 921 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
 922 ;; all 1's, since the machine does not have to wait for the previous
 923 ;; instruction using the register being set (such as a store waiting on a slow
 924 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
 925
 926 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
 927 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
 928 ;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
 929 (define_insn "*vsx_mov<mode>_64bit"
 930   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
 931                "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
 932                 ?&r,       ??r,       ??Y,       ??r,       wo,        v,
 933                 ?<VSa>,    *r,        v,         ??r,       wZ,        v")
 934
 935         (match_operand:VSX_M 1 "input_operand"
 936                "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
 937                 wQ,        Y,         r,         r,         wE,        jwM,
 938                 ?jwM,      jwM,       W,         W,         v,         wZ"))]
 939
 940   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
 941    && (register_operand (operands[0], <MODE>mode)
 942        || register_operand (operands[1], <MODE>mode))"
 943 {
 944   return rs6000_output_move_128bit (operands);
 945 }
 946   [(set_attr "type"
 947                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
 948                 store,     load,      store,     *,         vecsimple, vecsimple,
 949                 vecsimple, *,         *,         *,         vecstore,  vecload")
 950
 951    (set_attr "length"
 952                "4,         4,         4,         8,         4,         8,
 953                 8,         8,         8,         8,         4,         4,
 954                 4,         8,         20,        20,        4,         4")])
 955
 956 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
 957 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
 958 ;;              LVX (VMX)  STVX (VMX)
 959 (define_insn "*vsx_mov<mode>_32bit"
 960   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
 961                "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       ??r,
 962                 wo,        v,         ?<VSa>,    *r,        v,         ??r,
 963                 wZ,        v")
 964
 965         (match_operand:VSX_M 1 "input_operand"
 966                "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
 967                 wE,        jwM,       ?jwM,      jwM,       W,         W,
 968                 v,         wZ"))]
 969
 970   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
 971    && (register_operand (operands[0], <MODE>mode)
 972        || register_operand (operands[1], <MODE>mode))"
 973 {
 974   return rs6000_output_move_128bit (operands);
 975 }
 976   [(set_attr "type"
 977                "vecstore,  vecload,   vecsimple, load,      store,    *,
 978                 vecsimple, vecsimple, vecsimple, *,         *,        *,
 979                 vecstore,  vecload")
 980
 981    (set_attr "length"
 982                "4,         4,         4,         16,        16,        16,
 983                 4,         4,         4,         16,        20,        32,
 984                 4,         4")])
 985
 986 ;; Explicit  load/store expanders for the builtin functions
 987 (define_expand "vsx_load_<mode>"
 988   [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
 989         (match_operand:VSX_M 1 "memory_operand" ""))]
 990   "VECTOR_MEM_VSX_P (<MODE>mode)"
 991 {
 992   /* Expand to swaps if needed, prior to swap optimization.  */
 993   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
 994     {
 995       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
 996       DONE;
 997     }
 998 })
 999
1000 (define_expand "vsx_store_<mode>"
1001   [(set (match_operand:VSX_M 0 "memory_operand" "")
1002         (match_operand:VSX_M 1 "vsx_register_operand" ""))]
1003   "VECTOR_MEM_VSX_P (<MODE>mode)"
1004 {
1005   /* Expand to swaps if needed, prior to swap optimization.  */
1006   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1007     {
1008       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1009       DONE;
1010     }
1011 })
1012
1013 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1014 ;; when you really want their element-reversing behavior.
1015 (define_insn "vsx_ld_elemrev_v2di"
1016   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1017         (vec_select:V2DI
1018           (match_operand:V2DI 1 "memory_operand" "Z")
1019           (parallel [(const_int 1) (const_int 0)])))]
1020   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1021   "lxvd2x %x0,%y1"
1022   [(set_attr "type" "vecload")])
1023
1024 (define_insn "vsx_ld_elemrev_v2df"
1025   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1026         (vec_select:V2DF
1027           (match_operand:V2DF 1 "memory_operand" "Z")
1028           (parallel [(const_int 1) (const_int 0)])))]
1029   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1030   "lxvd2x %x0,%y1"
1031   [(set_attr "type" "vecload")])
1032
1033 (define_insn "vsx_ld_elemrev_v4si"
1034   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1035         (vec_select:V4SI
1036           (match_operand:V4SI 1 "memory_operand" "Z")
1037           (parallel [(const_int 3) (const_int 2)
1038                      (const_int 1) (const_int 0)])))]
1039   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1040   "lxvw4x %x0,%y1"
1041   [(set_attr "type" "vecload")])
1042
1043 (define_insn "vsx_ld_elemrev_v4sf"
1044   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1045         (vec_select:V4SF
1046           (match_operand:V4SF 1 "memory_operand" "Z")
1047           (parallel [(const_int 3) (const_int 2)
1048                      (const_int 1) (const_int 0)])))]
1049   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1050   "lxvw4x %x0,%y1"
1051   [(set_attr "type" "vecload")])
1052
1053 (define_insn "vsx_ld_elemrev_v8hi"
1054   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1055         (vec_select:V8HI
1056           (match_operand:V8HI 1 "memory_operand" "Z")
1057           (parallel [(const_int 7) (const_int 6)
1058                      (const_int 5) (const_int 4)
1059                      (const_int 3) (const_int 2)
1060                      (const_int 1) (const_int 0)])))]
1061   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1062   "lxvh8x %x0,%y1"
1063   [(set_attr "type" "vecload")])
1064
1065 (define_insn "vsx_ld_elemrev_v16qi"
1066   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1067         (vec_select:V16QI
1068           (match_operand:V16QI 1 "memory_operand" "Z")
1069           (parallel [(const_int 15) (const_int 14)
1070                      (const_int 13) (const_int 12)
1071                      (const_int 11) (const_int 10)
1072                      (const_int  9) (const_int  8)
1073                      (const_int  7) (const_int  6)
1074                      (const_int  5) (const_int  4)
1075                      (const_int  3) (const_int  2)
1076                      (const_int  1) (const_int  0)])))]
1077   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1078   "lxvb16x %x0,%y1"
1079   [(set_attr "type" "vecload")])
1080
1081 (define_insn "vsx_st_elemrev_v2df"
1082   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1083         (vec_select:V2DF
1084           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1085           (parallel [(const_int 1) (const_int 0)])))]
1086   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1087   "stxvd2x %x1,%y0"
1088   [(set_attr "type" "vecstore")])
1089
1090 (define_insn "vsx_st_elemrev_v2di"
1091   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1092         (vec_select:V2DI
1093           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1094           (parallel [(const_int 1) (const_int 0)])))]
1095   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1096   "stxvd2x %x1,%y0"
1097   [(set_attr "type" "vecstore")])
1098
1099 (define_insn "vsx_st_elemrev_v4sf"
1100   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1101         (vec_select:V4SF
1102           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1103           (parallel [(const_int 3) (const_int 2)
1104                      (const_int 1) (const_int 0)])))]
1105   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1106   "stxvw4x %x1,%y0"
1107   [(set_attr "type" "vecstore")])
1108
1109 (define_insn "vsx_st_elemrev_v4si"
1110   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1111         (vec_select:V4SI
1112           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1113           (parallel [(const_int 3) (const_int 2)
1114                      (const_int 1) (const_int 0)])))]
1115   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1116   "stxvw4x %x1,%y0"
1117   [(set_attr "type" "vecstore")])
1118
1119 (define_insn "vsx_st_elemrev_v8hi"
1120   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1121         (vec_select:V8HI
1122           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1123           (parallel [(const_int 7) (const_int 6)
1124                      (const_int 5) (const_int 4)
1125                      (const_int 3) (const_int 2)
1126                      (const_int 1) (const_int 0)])))]
1127   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1128   "stxvh8x %x1,%y0"
1129   [(set_attr "type" "vecstore")])
1130
1131 (define_insn "vsx_st_elemrev_v16qi"
1132   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1133         (vec_select:V16QI
1134           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1135           (parallel [(const_int 15) (const_int 14)
1136                      (const_int 13) (const_int 12)
1137                      (const_int 11) (const_int 10)
1138                      (const_int  9) (const_int  8)
1139                      (const_int  7) (const_int  6)
1140                      (const_int  5) (const_int  4)
1141                      (const_int  3) (const_int  2)
1142                      (const_int  1) (const_int  0)])))]
1143   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1144   "stxvb16x %x1,%y0"
1145   [(set_attr "type" "vecstore")])
1146
1147 \f
1148 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1149 ;; instructions are now combined with the insn for the traditional floating
1150 ;; point unit.
1151 (define_insn "*vsx_add<mode>3"
1152   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1153         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1154                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1155   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1156   "xvadd<VSs> %x0,%x1,%x2"
1157   [(set_attr "type" "<VStype_simple>")
1158    (set_attr "fp_type" "<VSfptype_simple>")])
1159
1160 (define_insn "*vsx_sub<mode>3"
1161   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1162         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1163                      (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1164   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1165   "xvsub<VSs> %x0,%x1,%x2"
1166   [(set_attr "type" "<VStype_simple>")
1167    (set_attr "fp_type" "<VSfptype_simple>")])
1168
1169 (define_insn "*vsx_mul<mode>3"
1170   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1171         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1172                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1173   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1174   "xvmul<VSs> %x0,%x1,%x2"
1175   [(set_attr "type" "<VStype_simple>")
1176    (set_attr "fp_type" "<VSfptype_mul>")])
1177
1178 ; Emulate vector with scalar for vec_mul in V2DImode
1179 (define_insn_and_split "vsx_mul_v2di"
1180   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1181         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1182                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1183                      UNSPEC_VSX_MULSD))]
1184   "VECTOR_MEM_VSX_P (V2DImode)"
1185   "#"
1186   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1187   [(const_int 0)]
1188   "
1189 {
1190   rtx op0 = operands[0];
1191   rtx op1 = operands[1];
1192   rtx op2 = operands[2];
1193   rtx op3 = gen_reg_rtx (DImode);
1194   rtx op4 = gen_reg_rtx (DImode);
1195   rtx op5 = gen_reg_rtx (DImode);
1196   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1197   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1198   emit_insn (gen_muldi3 (op5, op3, op4));
1199   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1200   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1201   emit_insn (gen_muldi3 (op3, op3, op4));
1202   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1203   DONE;
1204 }"
1205   [(set_attr "type" "mul")])
1206
1207 (define_insn "*vsx_div<mode>3"
1208   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1209         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1210                    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1211   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1212   "xvdiv<VSs> %x0,%x1,%x2"
1213   [(set_attr "type" "<VStype_div>")
1214    (set_attr "fp_type" "<VSfptype_div>")])
1215
1216 ; Emulate vector with scalar for vec_div in V2DImode
1217 (define_insn_and_split "vsx_div_v2di"
1218   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1219         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1220                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1221                      UNSPEC_VSX_DIVSD))]
1222   "VECTOR_MEM_VSX_P (V2DImode)"
1223   "#"
1224   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1225   [(const_int 0)]
1226   "
1227 {
1228   rtx op0 = operands[0];
1229   rtx op1 = operands[1];
1230   rtx op2 = operands[2];
1231   rtx op3 = gen_reg_rtx (DImode);
1232   rtx op4 = gen_reg_rtx (DImode);
1233   rtx op5 = gen_reg_rtx (DImode);
1234   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1235   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1236   emit_insn (gen_divdi3 (op5, op3, op4));
1237   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1238   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1239   emit_insn (gen_divdi3 (op3, op3, op4));
1240   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1241   DONE;
1242 }"
1243   [(set_attr "type" "div")])
1244
1245 (define_insn_and_split "vsx_udiv_v2di"
1246   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1247         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1248                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1249                      UNSPEC_VSX_DIVUD))]
1250   "VECTOR_MEM_VSX_P (V2DImode)"
1251   "#"
1252   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1253   [(const_int 0)]
1254   "
1255 {
1256   rtx op0 = operands[0];
1257   rtx op1 = operands[1];
1258   rtx op2 = operands[2];
1259   rtx op3 = gen_reg_rtx (DImode);
1260   rtx op4 = gen_reg_rtx (DImode);
1261   rtx op5 = gen_reg_rtx (DImode);
1262   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1263   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1264   emit_insn (gen_udivdi3 (op5, op3, op4));
1265   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1266   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1267   emit_insn (gen_udivdi3 (op3, op3, op4));
1268   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1269   DONE;
1270 }"
1271   [(set_attr "type" "div")])
1272
1273 ;; *tdiv* instruction returning the FG flag
1274 (define_expand "vsx_tdiv<mode>3_fg"
1275   [(set (match_dup 3)
1276         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1277                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
1278                      UNSPEC_VSX_TDIV))
1279    (set (match_operand:SI 0 "gpc_reg_operand" "")
1280         (gt:SI (match_dup 3)
1281                (const_int 0)))]
1282   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1283 {
1284   operands[3] = gen_reg_rtx (CCFPmode);
1285 })
1286
1287 ;; *tdiv* instruction returning the FE flag
1288 (define_expand "vsx_tdiv<mode>3_fe"
1289   [(set (match_dup 3)
1290         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1291                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
1292                      UNSPEC_VSX_TDIV))
1293    (set (match_operand:SI 0 "gpc_reg_operand" "")
1294         (eq:SI (match_dup 3)
1295                (const_int 0)))]
1296   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1297 {
1298   operands[3] = gen_reg_rtx (CCFPmode);
1299 })
1300
1301 (define_insn "*vsx_tdiv<mode>3_internal"
1302   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1303         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1304                       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1305                    UNSPEC_VSX_TDIV))]
1306   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1307   "x<VSv>tdiv<VSs> %0,%x1,%x2"
1308   [(set_attr "type" "<VStype_simple>")
1309    (set_attr "fp_type" "<VSfptype_simple>")])
1310
1311 (define_insn "vsx_fre<mode>2"
1312   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1313         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1314                       UNSPEC_FRES))]
1315   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1316   "xvre<VSs> %x0,%x1"
1317   [(set_attr "type" "<VStype_simple>")
1318    (set_attr "fp_type" "<VSfptype_simple>")])
1319
1320 (define_insn "*vsx_neg<mode>2"
1321   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1322         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1323   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1324   "xvneg<VSs> %x0,%x1"
1325   [(set_attr "type" "<VStype_simple>")
1326    (set_attr "fp_type" "<VSfptype_simple>")])
1327
1328 (define_insn "*vsx_abs<mode>2"
1329   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1330         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1331   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1332   "xvabs<VSs> %x0,%x1"
1333   [(set_attr "type" "<VStype_simple>")
1334    (set_attr "fp_type" "<VSfptype_simple>")])
1335
1336 (define_insn "vsx_nabs<mode>2"
1337   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1338         (neg:VSX_F
1339          (abs:VSX_F
1340           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1341   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1342   "xvnabs<VSs> %x0,%x1"
1343   [(set_attr "type" "<VStype_simple>")
1344    (set_attr "fp_type" "<VSfptype_simple>")])
1345
1346 (define_insn "vsx_smax<mode>3"
1347   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1348         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1349                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1350   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1351   "xvmax<VSs> %x0,%x1,%x2"
1352   [(set_attr "type" "<VStype_simple>")
1353    (set_attr "fp_type" "<VSfptype_simple>")])
1354
1355 (define_insn "*vsx_smin<mode>3"
1356   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1357         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1358                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1359   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1360   "xvmin<VSs> %x0,%x1,%x2"
1361   [(set_attr "type" "<VStype_simple>")
1362    (set_attr "fp_type" "<VSfptype_simple>")])
1363
1364 (define_insn "*vsx_sqrt<mode>2"
1365   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1366         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1367   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1368   "xvsqrt<VSs> %x0,%x1"
1369   [(set_attr "type" "<VStype_sqrt>")
1370    (set_attr "fp_type" "<VSfptype_sqrt>")])
1371
1372 (define_insn "*vsx_rsqrte<mode>2"
1373   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1374         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1375                       UNSPEC_RSQRT))]
1376   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1377   "xvrsqrte<VSs> %x0,%x1"
1378   [(set_attr "type" "<VStype_simple>")
1379    (set_attr "fp_type" "<VSfptype_simple>")])
1380
1381 ;; *tsqrt* returning the fg flag
1382 (define_expand "vsx_tsqrt<mode>2_fg"
1383   [(set (match_dup 2)
1384         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1385                      UNSPEC_VSX_TSQRT))
1386    (set (match_operand:SI 0 "gpc_reg_operand" "")
1387         (gt:SI (match_dup 2)
1388                (const_int 0)))]
1389   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1390 {
1391   operands[2] = gen_reg_rtx (CCFPmode);
1392 })
1393
1394 ;; *tsqrt* returning the fe flag
1395 (define_expand "vsx_tsqrt<mode>2_fe"
1396   [(set (match_dup 2)
1397         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1398                      UNSPEC_VSX_TSQRT))
1399    (set (match_operand:SI 0 "gpc_reg_operand" "")
1400         (eq:SI (match_dup 2)
1401                (const_int 0)))]
1402   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1403 {
1404   operands[2] = gen_reg_rtx (CCFPmode);
1405 })
1406
1407 (define_insn "*vsx_tsqrt<mode>2_internal"
1408   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1409         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1410                      UNSPEC_VSX_TSQRT))]
1411   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1412   "x<VSv>tsqrt<VSs> %0,%x1"
1413   [(set_attr "type" "<VStype_simple>")
1414    (set_attr "fp_type" "<VSfptype_simple>")])
1415
1416 ;; Fused vector multiply/add instructions. Support the classical Altivec
1417 ;; versions of fma, which allows the target to be a separate register from the
1418 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1419 ;; multiply.
1420
1421 (define_insn "*vsx_fmav4sf4"
1422   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1423         (fma:V4SF
1424           (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1425           (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1426           (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1427   "VECTOR_UNIT_VSX_P (V4SFmode)"
1428   "@
1429    xvmaddasp %x0,%x1,%x2
1430    xvmaddmsp %x0,%x1,%x3
1431    xvmaddasp %x0,%x1,%x2
1432    xvmaddmsp %x0,%x1,%x3
1433    vmaddfp %0,%1,%2,%3"
1434   [(set_attr "type" "vecfloat")])
1435
1436 (define_insn "*vsx_fmav2df4"
1437   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1438         (fma:V2DF
1439           (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1440           (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1441           (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1442   "VECTOR_UNIT_VSX_P (V2DFmode)"
1443   "@
1444    xvmaddadp %x0,%x1,%x2
1445    xvmaddmdp %x0,%x1,%x3
1446    xvmaddadp %x0,%x1,%x2
1447    xvmaddmdp %x0,%x1,%x3"
1448   [(set_attr "type" "vecdouble")])
1449
1450 (define_insn "*vsx_fms<mode>4"
1451   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1452         (fma:VSX_F
1453           (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1454           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1455           (neg:VSX_F
1456             (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1457   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1458   "@
1459    xvmsuba<VSs> %x0,%x1,%x2
1460    xvmsubm<VSs> %x0,%x1,%x3
1461    xvmsuba<VSs> %x0,%x1,%x2
1462    xvmsubm<VSs> %x0,%x1,%x3"
1463   [(set_attr "type" "<VStype_mul>")])
1464
1465 (define_insn "*vsx_nfma<mode>4"
1466   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1467         (neg:VSX_F
1468          (fma:VSX_F
1469           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1470           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1471           (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1472   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1473   "@
1474    xvnmadda<VSs> %x0,%x1,%x2
1475    xvnmaddm<VSs> %x0,%x1,%x3
1476    xvnmadda<VSs> %x0,%x1,%x2
1477    xvnmaddm<VSs> %x0,%x1,%x3"
1478   [(set_attr "type" "<VStype_mul>")
1479    (set_attr "fp_type" "<VSfptype_mul>")])
1480
1481 (define_insn "*vsx_nfmsv4sf4"
1482   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1483         (neg:V4SF
1484          (fma:V4SF
1485            (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1486            (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1487            (neg:V4SF
1488              (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1489   "VECTOR_UNIT_VSX_P (V4SFmode)"
1490   "@
1491    xvnmsubasp %x0,%x1,%x2
1492    xvnmsubmsp %x0,%x1,%x3
1493    xvnmsubasp %x0,%x1,%x2
1494    xvnmsubmsp %x0,%x1,%x3
1495    vnmsubfp %0,%1,%2,%3"
1496   [(set_attr "type" "vecfloat")])
1497
1498 (define_insn "*vsx_nfmsv2df4"
1499   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1500         (neg:V2DF
1501          (fma:V2DF
1502            (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1503            (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1504            (neg:V2DF
1505              (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1506   "VECTOR_UNIT_VSX_P (V2DFmode)"
1507   "@
1508    xvnmsubadp %x0,%x1,%x2
1509    xvnmsubmdp %x0,%x1,%x3
1510    xvnmsubadp %x0,%x1,%x2
1511    xvnmsubmdp %x0,%x1,%x3"
1512   [(set_attr "type" "vecdouble")])
1513
1514 ;; Vector conditional expressions (no scalar version for these instructions)
1515 (define_insn "vsx_eq<mode>"
1516   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1517         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1518                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1519   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1520   "xvcmpeq<VSs> %x0,%x1,%x2"
1521   [(set_attr "type" "<VStype_simple>")
1522    (set_attr "fp_type" "<VSfptype_simple>")])
1523
1524 (define_insn "vsx_gt<mode>"
1525   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1526         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1527                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1528   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1529   "xvcmpgt<VSs> %x0,%x1,%x2"
1530   [(set_attr "type" "<VStype_simple>")
1531    (set_attr "fp_type" "<VSfptype_simple>")])
1532
1533 (define_insn "*vsx_ge<mode>"
1534   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1535         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1536                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1537   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1538   "xvcmpge<VSs> %x0,%x1,%x2"
1539   [(set_attr "type" "<VStype_simple>")
1540    (set_attr "fp_type" "<VSfptype_simple>")])
1541
1542 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1543 ;; indicate a combined status
1544 (define_insn "*vsx_eq_<mode>_p"
1545   [(set (reg:CC CR6_REGNO)
1546         (unspec:CC
1547          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1548                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1549          UNSPEC_PREDICATE))
1550    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1551         (eq:VSX_F (match_dup 1)
1552                   (match_dup 2)))]
1553   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1554   "xvcmpeq<VSs>. %x0,%x1,%x2"
1555   [(set_attr "type" "<VStype_simple>")])
1556
1557 (define_insn "*vsx_gt_<mode>_p"
1558   [(set (reg:CC CR6_REGNO)
1559         (unspec:CC
1560          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1561                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1562          UNSPEC_PREDICATE))
1563    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1564         (gt:VSX_F (match_dup 1)
1565                   (match_dup 2)))]
1566   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1567   "xvcmpgt<VSs>. %x0,%x1,%x2"
1568   [(set_attr "type" "<VStype_simple>")])
1569
1570 (define_insn "*vsx_ge_<mode>_p"
1571   [(set (reg:CC CR6_REGNO)
1572         (unspec:CC
1573          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1574                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1575          UNSPEC_PREDICATE))
1576    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1577         (ge:VSX_F (match_dup 1)
1578                   (match_dup 2)))]
1579   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1580   "xvcmpge<VSs>. %x0,%x1,%x2"
1581   [(set_attr "type" "<VStype_simple>")])
1582
1583 ;; Vector select
1584 (define_insn "*vsx_xxsel<mode>"
1585   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1586         (if_then_else:VSX_L
1587          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1588                 (match_operand:VSX_L 4 "zero_constant" ""))
1589          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1590          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1591   "VECTOR_MEM_VSX_P (<MODE>mode)"
1592   "xxsel %x0,%x3,%x2,%x1"
1593   [(set_attr "type" "vecmove")])
1594
1595 (define_insn "*vsx_xxsel<mode>_uns"
1596   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1597         (if_then_else:VSX_L
1598          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1599                    (match_operand:VSX_L 4 "zero_constant" ""))
1600          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1601          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1602   "VECTOR_MEM_VSX_P (<MODE>mode)"
1603   "xxsel %x0,%x3,%x2,%x1"
1604   [(set_attr "type" "vecmove")])
1605
1606 ;; Copy sign
1607 (define_insn "vsx_copysign<mode>3"
1608   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1609         (unspec:VSX_F
1610          [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1611           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1612          UNSPEC_COPYSIGN))]
1613   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1614   "xvcpsgn<VSs> %x0,%x2,%x1"
1615   [(set_attr "type" "<VStype_simple>")
1616    (set_attr "fp_type" "<VSfptype_simple>")])
1617
1618 ;; For the conversions, limit the register class for the integer value to be
1619 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1620 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1621 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1622 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
1623 ;; in allowing virtual registers.
1624 (define_insn "vsx_float<VSi><mode>2"
1625   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1626         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1627   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1628   "xvcvsx<VSc><VSs> %x0,%x1"
1629   [(set_attr "type" "<VStype_simple>")
1630    (set_attr "fp_type" "<VSfptype_simple>")])
1631
1632 (define_insn "vsx_floatuns<VSi><mode>2"
1633   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1634         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1635   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1636   "xvcvux<VSc><VSs> %x0,%x1"
1637   [(set_attr "type" "<VStype_simple>")
1638    (set_attr "fp_type" "<VSfptype_simple>")])
1639
1640 (define_insn "vsx_fix_trunc<mode><VSi>2"
1641   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1642         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1643   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1644   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1645   [(set_attr "type" "<VStype_simple>")
1646    (set_attr "fp_type" "<VSfptype_simple>")])
1647
1648 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1649   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1650         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1651   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1652   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1653   [(set_attr "type" "<VStype_simple>")
1654    (set_attr "fp_type" "<VSfptype_simple>")])
1655
1656 ;; Math rounding functions
1657 (define_insn "vsx_x<VSv>r<VSs>i"
1658   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1659         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1660                       UNSPEC_VSX_ROUND_I))]
1661   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1662   "x<VSv>r<VSs>i %x0,%x1"
1663   [(set_attr "type" "<VStype_simple>")
1664    (set_attr "fp_type" "<VSfptype_simple>")])
1665
1666 (define_insn "vsx_x<VSv>r<VSs>ic"
1667   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1668         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1669                       UNSPEC_VSX_ROUND_IC))]
1670   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1671   "x<VSv>r<VSs>ic %x0,%x1"
1672   [(set_attr "type" "<VStype_simple>")
1673    (set_attr "fp_type" "<VSfptype_simple>")])
1674
1675 (define_insn "vsx_btrunc<mode>2"
1676   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1677         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1678   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1679   "xvr<VSs>iz %x0,%x1"
1680   [(set_attr "type" "<VStype_simple>")
1681    (set_attr "fp_type" "<VSfptype_simple>")])
1682
1683 (define_insn "*vsx_b2trunc<mode>2"
1684   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1685         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1686                       UNSPEC_FRIZ))]
1687   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1688   "x<VSv>r<VSs>iz %x0,%x1"
1689   [(set_attr "type" "<VStype_simple>")
1690    (set_attr "fp_type" "<VSfptype_simple>")])
1691
1692 (define_insn "vsx_floor<mode>2"
1693   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1694         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1695                       UNSPEC_FRIM))]
1696   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1697   "xvr<VSs>im %x0,%x1"
1698   [(set_attr "type" "<VStype_simple>")
1699    (set_attr "fp_type" "<VSfptype_simple>")])
1700
1701 (define_insn "vsx_ceil<mode>2"
1702   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1703         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1704                       UNSPEC_FRIP))]
1705   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1706   "xvr<VSs>ip %x0,%x1"
1707   [(set_attr "type" "<VStype_simple>")
1708    (set_attr "fp_type" "<VSfptype_simple>")])
1709
1710 \f
1711 ;; VSX convert to/from double vector
1712
1713 ;; Convert between single and double precision
1714 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1715 ;; scalar single precision instructions internally use the double format.
1716 ;; Prefer the altivec registers, since we likely will need to do a vperm
1717 (define_insn "vsx_<VS_spdp_insn>"
1718   [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1719         (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1720                               UNSPEC_VSX_CVSPDP))]
1721   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1722   "<VS_spdp_insn> %x0,%x1"
1723   [(set_attr "type" "<VS_spdp_type>")])
1724
1725 ;; xscvspdp, represent the scalar SF type as V4SF
1726 (define_insn "vsx_xscvspdp"
1727   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1728         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1729                    UNSPEC_VSX_CVSPDP))]
1730   "VECTOR_UNIT_VSX_P (V4SFmode)"
1731   "xscvspdp %x0,%x1"
1732   [(set_attr "type" "fp")])
1733
1734 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1735 ;; format of scalars is actually DF.
1736 (define_insn "vsx_xscvdpsp_scalar"
1737   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1738         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1739                      UNSPEC_VSX_CVSPDP))]
1740   "VECTOR_UNIT_VSX_P (V4SFmode)"
1741   "xscvdpsp %x0,%x1"
1742   [(set_attr "type" "fp")])
1743
1744 ;; Same as vsx_xscvspdp, but use SF as the type
1745 (define_insn "vsx_xscvspdp_scalar2"
1746   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
1747         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1748                    UNSPEC_VSX_CVSPDP))]
1749   "VECTOR_UNIT_VSX_P (V4SFmode)"
1750   "xscvspdp %x0,%x1"
1751   [(set_attr "type" "fp")])
1752
1753 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1754 (define_insn "vsx_xscvdpspn"
1755   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww,?ww")
1756         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1757                      UNSPEC_VSX_CVDPSPN))]
1758   "TARGET_XSCVDPSPN"
1759   "xscvdpspn %x0,%x1"
1760   [(set_attr "type" "fp")])
1761
1762 (define_insn "vsx_xscvspdpn"
1763   [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?ws")
1764         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
1765                    UNSPEC_VSX_CVSPDPN))]
1766   "TARGET_XSCVSPDPN"
1767   "xscvspdpn %x0,%x1"
1768   [(set_attr "type" "fp")])
1769
1770 (define_insn "vsx_xscvdpspn_scalar"
1771   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa")
1772         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww,ww")]
1773                      UNSPEC_VSX_CVDPSPN))]
1774   "TARGET_XSCVDPSPN"
1775   "xscvdpspn %x0,%x1"
1776   [(set_attr "type" "fp")])
1777
1778 ;; Used by direct move to move a SFmode value from GPR to VSX register
1779 (define_insn "vsx_xscvspdpn_directmove"
1780   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1781         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
1782                    UNSPEC_VSX_CVSPDPN))]
1783   "TARGET_XSCVSPDPN"
1784   "xscvspdpn %x0,%x1"
1785   [(set_attr "type" "fp")])
1786
1787 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1788
1789 (define_expand "vsx_xvcvsxddp_scale"
1790   [(match_operand:V2DF 0 "vsx_register_operand" "")
1791    (match_operand:V2DI 1 "vsx_register_operand" "")
1792    (match_operand:QI 2 "immediate_operand" "")]
1793   "VECTOR_UNIT_VSX_P (V2DFmode)"
1794 {
1795   rtx op0 = operands[0];
1796   rtx op1 = operands[1];
1797   int scale = INTVAL(operands[2]);
1798   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1799   if (scale != 0)
1800     rs6000_scale_v2df (op0, op0, -scale);
1801   DONE;
1802 })
1803
1804 (define_insn "vsx_xvcvsxddp"
1805   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1806         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1807                      UNSPEC_VSX_XVCVSXDDP))]
1808   "VECTOR_UNIT_VSX_P (V2DFmode)"
1809   "xvcvsxddp %x0,%x1"
1810   [(set_attr "type" "vecdouble")])
1811
1812 (define_expand "vsx_xvcvuxddp_scale"
1813   [(match_operand:V2DF 0 "vsx_register_operand" "")
1814    (match_operand:V2DI 1 "vsx_register_operand" "")
1815    (match_operand:QI 2 "immediate_operand" "")]
1816   "VECTOR_UNIT_VSX_P (V2DFmode)"
1817 {
1818   rtx op0 = operands[0];
1819   rtx op1 = operands[1];
1820   int scale = INTVAL(operands[2]);
1821   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1822   if (scale != 0)
1823     rs6000_scale_v2df (op0, op0, -scale);
1824   DONE;
1825 })
1826
1827 (define_insn "vsx_xvcvuxddp"
1828   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1829         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1830                      UNSPEC_VSX_XVCVUXDDP))]
1831   "VECTOR_UNIT_VSX_P (V2DFmode)"
1832   "xvcvuxddp %x0,%x1"
1833   [(set_attr "type" "vecdouble")])
1834
1835 (define_expand "vsx_xvcvdpsxds_scale"
1836   [(match_operand:V2DI 0 "vsx_register_operand" "")
1837    (match_operand:V2DF 1 "vsx_register_operand" "")
1838    (match_operand:QI 2 "immediate_operand" "")]
1839   "VECTOR_UNIT_VSX_P (V2DFmode)"
1840 {
1841   rtx op0 = operands[0];
1842   rtx op1 = operands[1];
1843   rtx tmp;
1844   int scale = INTVAL (operands[2]);
1845   if (scale == 0)
1846     tmp = op1;
1847   else
1848     {
1849       tmp  = gen_reg_rtx (V2DFmode);
1850       rs6000_scale_v2df (tmp, op1, scale);
1851     }
1852   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1853   DONE;
1854 })
1855
1856 (define_insn "vsx_xvcvdpsxds"
1857   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1858         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1859                      UNSPEC_VSX_XVCVDPSXDS))]
1860   "VECTOR_UNIT_VSX_P (V2DFmode)"
1861   "xvcvdpsxds %x0,%x1"
1862   [(set_attr "type" "vecdouble")])
1863
1864 (define_expand "vsx_xvcvdpuxds_scale"
1865   [(match_operand:V2DI 0 "vsx_register_operand" "")
1866    (match_operand:V2DF 1 "vsx_register_operand" "")
1867    (match_operand:QI 2 "immediate_operand" "")]
1868   "VECTOR_UNIT_VSX_P (V2DFmode)"
1869 {
1870   rtx op0 = operands[0];
1871   rtx op1 = operands[1];
1872   rtx tmp;
1873   int scale = INTVAL (operands[2]);
1874   if (scale == 0)
1875     tmp = op1;
1876   else
1877     {
1878       tmp = gen_reg_rtx (V2DFmode);
1879       rs6000_scale_v2df (tmp, op1, scale);
1880     }
1881   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1882   DONE;
1883 })
1884
1885 (define_insn "vsx_xvcvdpuxds"
1886   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1887         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1888                      UNSPEC_VSX_XVCVDPUXDS))]
1889   "VECTOR_UNIT_VSX_P (V2DFmode)"
1890   "xvcvdpuxds %x0,%x1"
1891   [(set_attr "type" "vecdouble")])
1892
1893 ;; Convert from 64-bit to 32-bit types
1894 ;; Note, favor the Altivec registers since the usual use of these instructions
1895 ;; is in vector converts and we need to use the Altivec vperm instruction.
1896
1897 (define_insn "vsx_xvcvdpsxws"
1898   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1899         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1900                      UNSPEC_VSX_CVDPSXWS))]
1901   "VECTOR_UNIT_VSX_P (V2DFmode)"
1902   "xvcvdpsxws %x0,%x1"
1903   [(set_attr "type" "vecdouble")])
1904
1905 (define_insn "vsx_xvcvdpuxws"
1906   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1907         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1908                      UNSPEC_VSX_CVDPUXWS))]
1909   "VECTOR_UNIT_VSX_P (V2DFmode)"
1910   "xvcvdpuxws %x0,%x1"
1911   [(set_attr "type" "vecdouble")])
1912
1913 (define_insn "vsx_xvcvsxdsp"
1914   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
1915         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
1916                      UNSPEC_VSX_CVSXDSP))]
1917   "VECTOR_UNIT_VSX_P (V2DFmode)"
1918   "xvcvsxdsp %x0,%x1"
1919   [(set_attr "type" "vecfloat")])
1920
1921 (define_insn "vsx_xvcvuxdsp"
1922   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
1923         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
1924                      UNSPEC_VSX_CVUXDSP))]
1925   "VECTOR_UNIT_VSX_P (V2DFmode)"
1926   "xvcvuxdsp %x0,%x1"
1927   [(set_attr "type" "vecdouble")])
1928
1929 ;; Convert from 32-bit to 64-bit types
1930 ;; Provide both vector and scalar targets
1931 (define_insn "vsx_xvcvsxwdp"
1932   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1933         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1934                      UNSPEC_VSX_CVSXWDP))]
1935   "VECTOR_UNIT_VSX_P (V2DFmode)"
1936   "xvcvsxwdp %x0,%x1"
1937   [(set_attr "type" "vecdouble")])
1938
1939 (define_insn "vsx_xvcvsxwdp_df"
1940   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1941         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
1942                    UNSPEC_VSX_CVSXWDP))]
1943   "TARGET_VSX"
1944   "xvcvsxwdp %x0,%x1"
1945   [(set_attr "type" "vecdouble")])
1946
1947 (define_insn "vsx_xvcvuxwdp"
1948   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1949         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1950                      UNSPEC_VSX_CVUXWDP))]
1951   "VECTOR_UNIT_VSX_P (V2DFmode)"
1952   "xvcvuxwdp %x0,%x1"
1953   [(set_attr "type" "vecdouble")])
1954
1955 (define_insn "vsx_xvcvuxwdp_df"
1956   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1957         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
1958                    UNSPEC_VSX_CVUXWDP))]
1959   "TARGET_VSX"
1960   "xvcvuxwdp %x0,%x1"
1961   [(set_attr "type" "vecdouble")])
1962
1963 (define_insn "vsx_xvcvspsxds"
1964   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1965         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1966                      UNSPEC_VSX_CVSPSXDS))]
1967   "VECTOR_UNIT_VSX_P (V2DFmode)"
1968   "xvcvspsxds %x0,%x1"
1969   [(set_attr "type" "vecdouble")])
1970
1971 (define_insn "vsx_xvcvspuxds"
1972   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1973         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1974                      UNSPEC_VSX_CVSPUXDS))]
1975   "VECTOR_UNIT_VSX_P (V2DFmode)"
1976   "xvcvspuxds %x0,%x1"
1977   [(set_attr "type" "vecdouble")])
1978
1979 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1980 ;; since the xvrdpiz instruction does not truncate the value if the floating
1981 ;; point value is < LONG_MIN or > LONG_MAX.
1982 (define_insn "*vsx_float_fix_v2df2"
1983   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1984         (float:V2DF
1985          (fix:V2DI
1986           (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
1987   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1988    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
1989    && !flag_trapping_math && TARGET_FRIZ"
1990   "xvrdpiz %x0,%x1"
1991   [(set_attr "type" "vecdouble")
1992    (set_attr "fp_type" "fp_addsub_d")])
1993
1994 \f
1995 ;; Permute operations
1996
1997 ;; Build a V2DF/V2DI vector from two scalars
1998 (define_insn "vsx_concat_<mode>"
1999   [(set (match_operand:VSX_D 0 "gpc_reg_operand" "=<VSa>,we")
2000         (vec_concat:VSX_D
2001          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VS_64reg>,b")
2002          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VS_64reg>,b")))]
2003   "VECTOR_MEM_VSX_P (<MODE>mode)"
2004 {
2005   if (which_alternative == 0)
2006     return (BYTES_BIG_ENDIAN
2007             ? "xxpermdi %x0,%x1,%x2,0"
2008             : "xxpermdi %x0,%x2,%x1,0");
2009
2010   else if (which_alternative == 1)
2011     return (BYTES_BIG_ENDIAN
2012             ? "mtvsrdd %x0,%1,%2"
2013             : "mtvsrdd %x0,%2,%1");
2014
2015   else
2016     gcc_unreachable ();
2017 }
2018   [(set_attr "type" "vecperm")])
2019
2020 ;; Special purpose concat using xxpermdi to glue two single precision values
2021 ;; together, relying on the fact that internally scalar floats are represented
2022 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2023 (define_insn "vsx_concat_v2sf"
2024   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2025         (unspec:V2DF
2026          [(match_operand:SF 1 "vsx_register_operand" "ww")
2027           (match_operand:SF 2 "vsx_register_operand" "ww")]
2028          UNSPEC_VSX_CONCAT))]
2029   "VECTOR_MEM_VSX_P (V2DFmode)"
2030 {
2031   if (BYTES_BIG_ENDIAN)
2032     return "xxpermdi %x0,%x1,%x2,0";
2033   else
2034     return "xxpermdi %x0,%x2,%x1,0";
2035 }
2036   [(set_attr "type" "vecperm")])
2037
2038 ;; V4SImode initialization splitter
2039 (define_insn_and_split "vsx_init_v4si"
2040   [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2041         (unspec:V4SI
2042          [(match_operand:SI 1 "reg_or_cint_operand" "rn")
2043           (match_operand:SI 2 "reg_or_cint_operand" "rn")
2044           (match_operand:SI 3 "reg_or_cint_operand" "rn")
2045           (match_operand:SI 4 "reg_or_cint_operand" "rn")]
2046          UNSPEC_VSX_VEC_INIT))
2047    (clobber (match_scratch:DI 5 "=&r"))
2048    (clobber (match_scratch:DI 6 "=&r"))]
2049    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2050    "#"
2051    "&& reload_completed"
2052    [(const_int 0)]
2053 {
2054   rs6000_split_v4si_init (operands);
2055   DONE;
2056 })
2057
2058 ;; xxpermdi for little endian loads and stores.  We need several of
2059 ;; these since the form of the PARALLEL differs by mode.
2060 (define_insn "*vsx_xxpermdi2_le_<mode>"
2061   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2062         (vec_select:VSX_D
2063           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2064           (parallel [(const_int 1) (const_int 0)])))]
2065   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2066   "xxpermdi %x0,%x1,%x1,2"
2067   [(set_attr "type" "vecperm")])
2068
2069 (define_insn "*vsx_xxpermdi4_le_<mode>"
2070   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2071         (vec_select:VSX_W
2072           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2073           (parallel [(const_int 2) (const_int 3)
2074                      (const_int 0) (const_int 1)])))]
2075   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2076   "xxpermdi %x0,%x1,%x1,2"
2077   [(set_attr "type" "vecperm")])
2078
2079 (define_insn "*vsx_xxpermdi8_le_V8HI"
2080   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2081         (vec_select:V8HI
2082           (match_operand:V8HI 1 "vsx_register_operand" "wa")
2083           (parallel [(const_int 4) (const_int 5)
2084                      (const_int 6) (const_int 7)
2085                      (const_int 0) (const_int 1)
2086                      (const_int 2) (const_int 3)])))]
2087   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
2088   "xxpermdi %x0,%x1,%x1,2"
2089   [(set_attr "type" "vecperm")])
2090
2091 (define_insn "*vsx_xxpermdi16_le_V16QI"
2092   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2093         (vec_select:V16QI
2094           (match_operand:V16QI 1 "vsx_register_operand" "wa")
2095           (parallel [(const_int 8) (const_int 9)
2096                      (const_int 10) (const_int 11)
2097                      (const_int 12) (const_int 13)
2098                      (const_int 14) (const_int 15)
2099                      (const_int 0) (const_int 1)
2100                      (const_int 2) (const_int 3)
2101                      (const_int 4) (const_int 5)
2102                      (const_int 6) (const_int 7)])))]
2103   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
2104   "xxpermdi %x0,%x1,%x1,2"
2105   [(set_attr "type" "vecperm")])
2106
2107 ;; lxvd2x for little endian loads.  We need several of
2108 ;; these since the form of the PARALLEL differs by mode.
2109 (define_insn "*vsx_lxvd2x2_le_<mode>"
2110   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2111         (vec_select:VSX_D
2112           (match_operand:VSX_D 1 "memory_operand" "Z")
2113           (parallel [(const_int 1) (const_int 0)])))]
2114   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2115   "lxvd2x %x0,%y1"
2116   [(set_attr "type" "vecload")])
2117
2118 (define_insn "*vsx_lxvd2x4_le_<mode>"
2119   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2120         (vec_select:VSX_W
2121           (match_operand:VSX_W 1 "memory_operand" "Z")
2122           (parallel [(const_int 2) (const_int 3)
2123                      (const_int 0) (const_int 1)])))]
2124   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2125   "lxvd2x %x0,%y1"
2126   [(set_attr "type" "vecload")])
2127
2128 (define_insn "*vsx_lxvd2x8_le_V8HI"
2129   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2130         (vec_select:V8HI
2131           (match_operand:V8HI 1 "memory_operand" "Z")
2132           (parallel [(const_int 4) (const_int 5)
2133                      (const_int 6) (const_int 7)
2134                      (const_int 0) (const_int 1)
2135                      (const_int 2) (const_int 3)])))]
2136   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2137   "lxvd2x %x0,%y1"
2138   [(set_attr "type" "vecload")])
2139
2140 (define_insn "*vsx_lxvd2x16_le_V16QI"
2141   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2142         (vec_select:V16QI
2143           (match_operand:V16QI 1 "memory_operand" "Z")
2144           (parallel [(const_int 8) (const_int 9)
2145                      (const_int 10) (const_int 11)
2146                      (const_int 12) (const_int 13)
2147                      (const_int 14) (const_int 15)
2148                      (const_int 0) (const_int 1)
2149                      (const_int 2) (const_int 3)
2150                      (const_int 4) (const_int 5)
2151                      (const_int 6) (const_int 7)])))]
2152   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2153   "lxvd2x %x0,%y1"
2154   [(set_attr "type" "vecload")])
2155
2156 ;; stxvd2x for little endian stores.  We need several of
2157 ;; these since the form of the PARALLEL differs by mode.
2158 (define_insn "*vsx_stxvd2x2_le_<mode>"
2159   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
2160         (vec_select:VSX_D
2161           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2162           (parallel [(const_int 1) (const_int 0)])))]
2163   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2164   "stxvd2x %x1,%y0"
2165   [(set_attr "type" "vecstore")])
2166
2167 (define_insn "*vsx_stxvd2x4_le_<mode>"
2168   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
2169         (vec_select:VSX_W
2170           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2171           (parallel [(const_int 2) (const_int 3)
2172                      (const_int 0) (const_int 1)])))]
2173   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2174   "stxvd2x %x1,%y0"
2175   [(set_attr "type" "vecstore")])
2176
2177 (define_insn "*vsx_stxvd2x8_le_V8HI"
2178   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
2179         (vec_select:V8HI
2180           (match_operand:V8HI 1 "vsx_register_operand" "wa")
2181           (parallel [(const_int 4) (const_int 5)
2182                      (const_int 6) (const_int 7)
2183                      (const_int 0) (const_int 1)
2184                      (const_int 2) (const_int 3)])))]
2185   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2186   "stxvd2x %x1,%y0"
2187   [(set_attr "type" "vecstore")])
2188
2189 (define_insn "*vsx_stxvd2x16_le_V16QI"
2190   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
2191         (vec_select:V16QI
2192           (match_operand:V16QI 1 "vsx_register_operand" "wa")
2193           (parallel [(const_int 8) (const_int 9)
2194                      (const_int 10) (const_int 11)
2195                      (const_int 12) (const_int 13)
2196                      (const_int 14) (const_int 15)
2197                      (const_int 0) (const_int 1)
2198                      (const_int 2) (const_int 3)
2199                      (const_int 4) (const_int 5)
2200                      (const_int 6) (const_int 7)])))]
2201   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2202   "stxvd2x %x1,%y0"
2203   [(set_attr "type" "vecstore")])
2204
2205 ;; Convert a TImode value into V1TImode
2206 (define_expand "vsx_set_v1ti"
2207   [(match_operand:V1TI 0 "nonimmediate_operand" "")
2208    (match_operand:V1TI 1 "nonimmediate_operand" "")
2209    (match_operand:TI 2 "input_operand" "")
2210    (match_operand:QI 3 "u5bit_cint_operand" "")]
2211   "VECTOR_MEM_VSX_P (V1TImode)"
2212 {
2213   if (operands[3] != const0_rtx)
2214     gcc_unreachable ();
2215
2216   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
2217   DONE;
2218 })
2219
2220 ;; Set the element of a V2DI/VD2F mode
2221 (define_insn "vsx_set_<mode>"
2222   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
2223         (unspec:VSX_D
2224          [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
2225           (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
2226           (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
2227          UNSPEC_VSX_SET))]
2228   "VECTOR_MEM_VSX_P (<MODE>mode)"
2229 {
2230   int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
2231   if (INTVAL (operands[3]) == idx_first)
2232     return \"xxpermdi %x0,%x2,%x1,1\";
2233   else if (INTVAL (operands[3]) == 1 - idx_first)
2234     return \"xxpermdi %x0,%x1,%x2,0\";
2235   else
2236     gcc_unreachable ();
2237 }
2238   [(set_attr "type" "vecperm")])
2239
2240 ;; Extract a DF/DI element from V2DF/V2DI
2241 ;; Optimize cases were we can do a simple or direct move.
2242 ;; Or see if we can avoid doing the move at all
2243
2244 ;; There are some unresolved problems with reload that show up if an Altivec
2245 ;; register was picked.  Limit the scalar value to FPRs for now.
2246
2247 (define_insn "vsx_extract_<mode>"
2248   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
2249
2250         (vec_select:<VS_scalar>
2251          (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
2252
2253          (parallel
2254           [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
2255   "VECTOR_MEM_VSX_P (<MODE>mode)"
2256 {
2257   int element = INTVAL (operands[2]);
2258   int op0_regno = REGNO (operands[0]);
2259   int op1_regno = REGNO (operands[1]);
2260   int fldDM;
2261
2262   gcc_assert (IN_RANGE (element, 0, 1));
2263   gcc_assert (VSX_REGNO_P (op1_regno));
2264
2265   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
2266     {
2267       if (op0_regno == op1_regno)
2268         return ASM_COMMENT_START " vec_extract to same register";
2269
2270       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
2271                && TARGET_POWERPC64)
2272         return "mfvsrd %0,%x1";
2273
2274       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
2275         return "fmr %0,%1";
2276
2277       else if (VSX_REGNO_P (op0_regno))
2278         return "xxlor %x0,%x1,%x1";
2279
2280       else
2281         gcc_unreachable ();
2282     }
2283
2284   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
2285            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
2286     return "mfvsrld %0,%x1";
2287
2288   else if (VSX_REGNO_P (op0_regno))
2289     {
2290       fldDM = element << 1;
2291       if (!BYTES_BIG_ENDIAN)
2292         fldDM = 3 - fldDM;
2293       operands[3] = GEN_INT (fldDM);
2294       return "xxpermdi %x0,%x1,%x1,%3";
2295     }
2296
2297   else
2298     gcc_unreachable ();
2299 }
2300   [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
2301
2302 ;; Optimize extracting a single scalar element from memory.
2303 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
2304   [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
2305         (vec_select:<VSX_D:VS_scalar>
2306          (match_operand:VSX_D 1 "memory_operand" "m,m")
2307          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
2308    (clobber (match_scratch:P 3 "=&b,&b"))]
2309   "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
2310   "#"
2311   "&& reload_completed"
2312   [(set (match_dup 0) (match_dup 4))]
2313 {
2314   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2315                                            operands[3], <VSX_D:VS_scalar>mode);
2316 }
2317   [(set_attr "type" "fpload,load")
2318    (set_attr "length" "8")])
2319
2320 ;; Optimize storing a single scalar element that is the right location to
2321 ;; memory
2322 (define_insn "*vsx_extract_<mode>_store"
2323   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
2324         (vec_select:<VS_scalar>
2325          (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
2326          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
2327   "VECTOR_MEM_VSX_P (<MODE>mode)"
2328   "@
2329    stfd%U0%X0 %1,%0
2330    stxsd%U0x %x1,%y0
2331    stxsd %1,%0"
2332   [(set_attr "type" "fpstore")
2333    (set_attr "length" "4")])
2334
2335 ;; Variable V2DI/V2DF extract shift
2336 (define_insn "vsx_vslo_<mode>"
2337   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
2338         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
2339                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
2340                             UNSPEC_VSX_VSLO))]
2341   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2342   "vslo %0,%1,%2"
2343   [(set_attr "type" "vecperm")])
2344
2345 ;; Variable V2DI/V2DF extract
2346 (define_insn_and_split "vsx_extract_<mode>_var"
2347   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
2348         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
2349                              (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2350                             UNSPEC_VSX_EXTRACT))
2351    (clobber (match_scratch:DI 3 "=r,&b,&b"))
2352    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
2353   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2354   "#"
2355   "&& reload_completed"
2356   [(const_int 0)]
2357 {
2358   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2359                                 operands[3], operands[4]);
2360   DONE;
2361 })
2362
2363 ;; Extract a SF element from V4SF
2364 (define_insn_and_split "vsx_extract_v4sf"
2365   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2366         (vec_select:SF
2367          (match_operand:V4SF 1 "vsx_register_operand" "wa")
2368          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
2369    (clobber (match_scratch:V4SF 3 "=0"))]
2370   "VECTOR_UNIT_VSX_P (V4SFmode)"
2371   "#"
2372   "&& 1"
2373   [(const_int 0)]
2374 {
2375   rtx op0 = operands[0];
2376   rtx op1 = operands[1];
2377   rtx op2 = operands[2];
2378   rtx op3 = operands[3];
2379   rtx tmp;
2380   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
2381
2382   if (ele == 0)
2383     tmp = op1;
2384   else
2385     {
2386       if (GET_CODE (op3) == SCRATCH)
2387         op3 = gen_reg_rtx (V4SFmode);
2388       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
2389       tmp = op3;
2390     }
2391   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
2392   DONE;
2393 }
2394   [(set_attr "length" "8")
2395    (set_attr "type" "fp")])
2396
2397 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
2398   [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
2399         (vec_select:SF
2400          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
2401          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
2402    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
2403   "VECTOR_MEM_VSX_P (V4SFmode)"
2404   "#"
2405   "&& reload_completed"
2406   [(set (match_dup 0) (match_dup 4))]
2407 {
2408   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2409                                            operands[3], SFmode);
2410 }
2411   [(set_attr "type" "fpload,fpload,fpload,load")
2412    (set_attr "length" "8")])
2413
2414 ;; Variable V4SF extract
2415 (define_insn_and_split "vsx_extract_v4sf_var"
2416   [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
2417         (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
2418                     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2419                    UNSPEC_VSX_EXTRACT))
2420    (clobber (match_scratch:DI 3 "=r,&b,&b"))
2421    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
2422   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT
2423    && TARGET_UPPER_REGS_SF"
2424   "#"
2425   "&& reload_completed"
2426   [(const_int 0)]
2427 {
2428   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2429                                 operands[3], operands[4]);
2430   DONE;
2431 })
2432
2433 ;; Expand the builtin form of xxpermdi to canonical rtl.
2434 (define_expand "vsx_xxpermdi_<mode>"
2435   [(match_operand:VSX_L 0 "vsx_register_operand")
2436    (match_operand:VSX_L 1 "vsx_register_operand")
2437    (match_operand:VSX_L 2 "vsx_register_operand")
2438    (match_operand:QI 3 "u5bit_cint_operand")]
2439   "VECTOR_MEM_VSX_P (<MODE>mode)"
2440 {
2441   rtx target = operands[0];
2442   rtx op0 = operands[1];
2443   rtx op1 = operands[2];
2444   int mask = INTVAL (operands[3]);
2445   rtx perm0 = GEN_INT ((mask >> 1) & 1);
2446   rtx perm1 = GEN_INT ((mask & 1) + 2);
2447   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2448
2449   if (<MODE>mode == V2DFmode)
2450     gen = gen_vsx_xxpermdi2_v2df_1;
2451   else
2452     {
2453       gen = gen_vsx_xxpermdi2_v2di_1;
2454       if (<MODE>mode != V2DImode)
2455         {
2456           target = gen_lowpart (V2DImode, target);
2457           op0 = gen_lowpart (V2DImode, op0);
2458           op1 = gen_lowpart (V2DImode, op1);
2459         }
2460     }
2461   emit_insn (gen (target, op0, op1, perm0, perm1));
2462   DONE;
2463 })
2464
2465 ;; Special version of xxpermdi that retains big-endian semantics.
2466 (define_expand "vsx_xxpermdi_<mode>_be"
2467   [(match_operand:VSX_L 0 "vsx_register_operand")
2468    (match_operand:VSX_L 1 "vsx_register_operand")
2469    (match_operand:VSX_L 2 "vsx_register_operand")
2470    (match_operand:QI 3 "u5bit_cint_operand")]
2471   "VECTOR_MEM_VSX_P (<MODE>mode)"
2472 {
2473   rtx target = operands[0];
2474   rtx op0 = operands[1];
2475   rtx op1 = operands[2];
2476   int mask = INTVAL (operands[3]);
2477   rtx perm0 = GEN_INT ((mask >> 1) & 1);
2478   rtx perm1 = GEN_INT ((mask & 1) + 2);
2479   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2480
2481   if (<MODE>mode == V2DFmode)
2482     gen = gen_vsx_xxpermdi2_v2df_1;
2483   else
2484     {
2485       gen = gen_vsx_xxpermdi2_v2di_1;
2486       if (<MODE>mode != V2DImode)
2487         {
2488           target = gen_lowpart (V2DImode, target);
2489           op0 = gen_lowpart (V2DImode, op0);
2490           op1 = gen_lowpart (V2DImode, op1);
2491         }
2492     }
2493   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
2494      transformation we don't want; it is necessary for
2495      rs6000_expand_vec_perm_const_1 but not for this use.  So we
2496      prepare for that by reversing the transformation here.  */
2497   if (BYTES_BIG_ENDIAN)
2498     emit_insn (gen (target, op0, op1, perm0, perm1));
2499   else
2500     {
2501       rtx p0 = GEN_INT (3 - INTVAL (perm1));
2502       rtx p1 = GEN_INT (3 - INTVAL (perm0));
2503       emit_insn (gen (target, op1, op0, p0, p1));
2504     }
2505   DONE;
2506 })
2507
2508 (define_insn "vsx_xxpermdi2_<mode>_1"
2509   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
2510         (vec_select:VSX_D
2511           (vec_concat:<VS_double>
2512             (match_operand:VSX_D 1 "vsx_register_operand" "wd")
2513             (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
2514           (parallel [(match_operand 3 "const_0_to_1_operand" "")
2515                      (match_operand 4 "const_2_to_3_operand" "")])))]
2516   "VECTOR_MEM_VSX_P (<MODE>mode)"
2517 {
2518   int op3, op4, mask;
2519
2520   /* For little endian, swap operands and invert/swap selectors
2521      to get the correct xxpermdi.  The operand swap sets up the
2522      inputs as a little endian array.  The selectors are swapped
2523      because they are defined to use big endian ordering.  The
2524      selectors are inverted to get the correct doublewords for
2525      little endian ordering.  */
2526   if (BYTES_BIG_ENDIAN)
2527     {
2528       op3 = INTVAL (operands[3]);
2529       op4 = INTVAL (operands[4]);
2530     }
2531   else
2532     {
2533       op3 = 3 - INTVAL (operands[4]);
2534       op4 = 3 - INTVAL (operands[3]);
2535     }
2536
2537   mask = (op3 << 1) | (op4 - 2);
2538   operands[3] = GEN_INT (mask);
2539
2540   if (BYTES_BIG_ENDIAN)
2541     return "xxpermdi %x0,%x1,%x2,%3";
2542   else
2543     return "xxpermdi %x0,%x2,%x1,%3";
2544 }
2545   [(set_attr "type" "vecperm")])
2546
2547 (define_expand "vec_perm_const<mode>"
2548   [(match_operand:VSX_D 0 "vsx_register_operand" "")
2549    (match_operand:VSX_D 1 "vsx_register_operand" "")
2550    (match_operand:VSX_D 2 "vsx_register_operand" "")
2551    (match_operand:V2DI  3 "" "")]
2552   "VECTOR_MEM_VSX_P (<MODE>mode)"
2553 {
2554   if (rs6000_expand_vec_perm_const (operands))
2555     DONE;
2556   else
2557     FAIL;
2558 })
2559
2560 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
2561 ;; none of the small types were allowed in a vector register, so we had to
2562 ;; extract to a DImode and either do a direct move or store.
2563 (define_expand  "vsx_extract_<mode>"
2564   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
2565                    (vec_select:<VS_scalar>
2566                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
2567                     (parallel [(match_operand:QI 2 "const_int_operand")])))
2568               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
2569   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2570 {
2571   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
2572   if (TARGET_VSX_SMALL_INTEGER && TARGET_P9_VECTOR)
2573     {
2574       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
2575                                             operands[2]));
2576       DONE;
2577     }
2578 })
2579
2580 (define_insn "vsx_extract_<mode>_p9"
2581   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
2582         (vec_select:<VS_scalar>
2583          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
2584          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
2585    (clobber (match_scratch:SI 3 "=r,X"))]
2586   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
2587    && TARGET_VSX_SMALL_INTEGER"
2588 {
2589   if (which_alternative == 0)
2590     return "#";
2591
2592   else
2593     {
2594       HOST_WIDE_INT elt = INTVAL (operands[2]);
2595       HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
2596                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
2597                                : elt);
2598
2599       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
2600       HOST_WIDE_INT offset = unit_size * elt_adj;
2601
2602       operands[2] = GEN_INT (offset);
2603       if (unit_size == 4)
2604         return "xxextractuw %x0,%x1,%2";
2605       else
2606         return "vextractu<wd> %0,%1,%2";
2607     }
2608 }
2609   [(set_attr "type" "vecsimple")])
2610
2611 (define_split
2612   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
2613         (vec_select:<VS_scalar>
2614          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
2615          (parallel [(match_operand:QI 2 "const_int_operand")])))
2616    (clobber (match_operand:SI 3 "int_reg_operand"))]
2617   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
2618    && TARGET_VSX_SMALL_INTEGER && reload_completed"
2619   [(const_int 0)]
2620 {
2621   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
2622   rtx op1 = operands[1];
2623   rtx op2 = operands[2];
2624   rtx op3 = operands[3];
2625   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
2626
2627   emit_move_insn (op3, GEN_INT (offset));
2628   if (VECTOR_ELT_ORDER_BIG)
2629     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
2630   else
2631     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
2632   DONE;
2633 })
2634
2635 ;; Optimize zero extracts to eliminate the AND after the extract.
2636 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
2637   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
2638         (zero_extend:DI
2639          (vec_select:<VS_scalar>
2640           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
2641           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
2642    (clobber (match_scratch:SI 3 "=r,X"))]
2643   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
2644    && TARGET_VSX_SMALL_INTEGER"
2645   "#"
2646   "&& reload_completed"
2647   [(parallel [(set (match_dup 4)
2648                    (vec_select:<VS_scalar>
2649                     (match_dup 1)
2650                     (parallel [(match_dup 2)])))
2651               (clobber (match_dup 3))])]
2652 {
2653   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
2654 })
2655
2656 ;; Optimize stores to use the ISA 3.0 scalar store instructions
2657 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
2658   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
2659         (vec_select:<VS_scalar>
2660          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
2661          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
2662    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
2663    (clobber (match_scratch:SI 4 "=X,&r"))]
2664   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
2665    && TARGET_VSX_SMALL_INTEGER"
2666   "#"
2667   "&& reload_completed"
2668   [(parallel [(set (match_dup 3)
2669                    (vec_select:<VS_scalar>
2670                     (match_dup 1)
2671                     (parallel [(match_dup 2)])))
2672               (clobber (match_dup 4))])
2673    (set (match_dup 0)
2674         (match_dup 3))])
2675
2676 (define_insn_and_split  "*vsx_extract_si"
2677   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
2678         (vec_select:SI
2679          (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
2680          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
2681    (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
2682   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT
2683    && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)"
2684   "#"
2685   "&& reload_completed"
2686   [(const_int 0)]
2687 {
2688   rtx dest = operands[0];
2689   rtx src = operands[1];
2690   rtx element = operands[2];
2691   rtx vec_tmp = operands[3];
2692   int value;
2693
2694   if (!VECTOR_ELT_ORDER_BIG)
2695     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
2696
2697   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
2698      instruction.  */
2699   value = INTVAL (element);
2700   if (value != 1)
2701     {
2702       if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER)
2703         {
2704           rtx si_tmp = gen_rtx_REG (SImode, REGNO (vec_tmp));
2705           emit_insn (gen_vsx_extract_v4si_p9 (si_tmp,src, element));
2706         }
2707       else
2708         emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
2709     }
2710   else
2711     vec_tmp = src;
2712
2713   if (MEM_P (operands[0]))
2714     {
2715       if (can_create_pseudo_p ())
2716         dest = rs6000_address_for_fpconvert (dest);
2717
2718       if (TARGET_VSX_SMALL_INTEGER)
2719         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
2720       else
2721         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
2722     }
2723
2724   else if (TARGET_VSX_SMALL_INTEGER)
2725     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
2726   else
2727     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
2728                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
2729
2730   DONE;
2731 }
2732   [(set_attr "type" "mftgpr,vecperm,fpstore")
2733    (set_attr "length" "8")])
2734
2735 (define_insn_and_split  "*vsx_extract_<mode>_p8"
2736   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
2737         (vec_select:<VS_scalar>
2738          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
2739          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
2740    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
2741   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
2742    && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)"
2743   "#"
2744   "&& reload_completed"
2745   [(const_int 0)]
2746 {
2747   rtx dest = operands[0];
2748   rtx src = operands[1];
2749   rtx element = operands[2];
2750   rtx vec_tmp = operands[3];
2751   int value;
2752
2753   if (!VECTOR_ELT_ORDER_BIG)
2754     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
2755
2756   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
2757      instruction.  */
2758   value = INTVAL (element);
2759   if (<MODE>mode == V16QImode)
2760     {
2761       if (value != 7)
2762         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
2763       else
2764         vec_tmp = src;
2765     }
2766   else if (<MODE>mode == V8HImode)
2767     {
2768       if (value != 3)
2769         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
2770       else
2771         vec_tmp = src;
2772     }
2773   else
2774     gcc_unreachable ();
2775
2776   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
2777                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
2778   DONE;
2779 }
2780   [(set_attr "type" "mftgpr")])
2781
2782 ;; Optimize extracting a single scalar element from memory.
2783 (define_insn_and_split "*vsx_extract_<mode>_load"
2784   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
2785         (vec_select:<VS_scalar>
2786          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
2787          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
2788    (clobber (match_scratch:DI 3 "=&b"))]
2789   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2790   "#"
2791   "&& reload_completed"
2792   [(set (match_dup 0) (match_dup 4))]
2793 {
2794   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2795                                            operands[3], <VS_scalar>mode);
2796 }
2797   [(set_attr "type" "load")
2798    (set_attr "length" "8")])
2799
2800 ;; Variable V16QI/V8HI/V4SI extract
2801 (define_insn_and_split "vsx_extract_<mode>_var"
2802   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
2803         (unspec:<VS_scalar>
2804          [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
2805           (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2806          UNSPEC_VSX_EXTRACT))
2807    (clobber (match_scratch:DI 3 "=r,r,&b"))
2808    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
2809   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2810   "#"
2811   "&& reload_completed"
2812   [(const_int 0)]
2813 {
2814   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2815                                 operands[3], operands[4]);
2816   DONE;
2817 })
2818
2819 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
2820   [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
2821         (zero_extend:SDI
2822          (unspec:<VSX_EXTRACT_I:VS_scalar>
2823           [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
2824            (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2825           UNSPEC_VSX_EXTRACT)))
2826    (clobber (match_scratch:DI 3 "=r,r,&b"))
2827    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
2828   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2829   "#"
2830   "&& reload_completed"
2831   [(const_int 0)]
2832 {
2833   machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
2834   rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
2835                                 operands[1], operands[2],
2836                                 operands[3], operands[4]);
2837   DONE;
2838 })
2839
2840 ;; VSX_EXTRACT optimizations
2841 ;; Optimize double d = (double) vec_extract (vi, <n>)
2842 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
2843 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
2844   [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
2845         (any_float:DF
2846          (vec_select:SI
2847           (match_operand:V4SI 1 "gpc_reg_operand" "v")
2848           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
2849    (clobber (match_scratch:V4SI 3 "=v"))]
2850   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2851   "#"
2852   "&& 1"
2853   [(const_int 0)]
2854 {
2855   rtx dest = operands[0];
2856   rtx src = operands[1];
2857   rtx element = operands[2];
2858   rtx v4si_tmp = operands[3];
2859   int value;
2860
2861   if (!VECTOR_ELT_ORDER_BIG)
2862     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
2863
2864   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
2865      instruction.  */
2866   value = INTVAL (element);
2867   if (value != 0)
2868     {
2869       if (GET_CODE (v4si_tmp) == SCRATCH)
2870         v4si_tmp = gen_reg_rtx (V4SImode);
2871       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
2872     }
2873   else
2874     v4si_tmp = src;
2875
2876   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
2877   DONE;
2878 })
2879
2880 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
2881 ;; where <type> is a floating point type that supported by the hardware that is
2882 ;; not double.  First convert the value to double, and then to the desired
2883 ;; type.
2884 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
2885   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
2886         (any_float:VSX_EXTRACT_FL
2887          (vec_select:SI
2888           (match_operand:V4SI 1 "gpc_reg_operand" "v")
2889           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
2890    (clobber (match_scratch:V4SI 3 "=v"))
2891    (clobber (match_scratch:DF 4 "=ws"))]
2892   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2893   "#"
2894   "&& 1"
2895   [(const_int 0)]
2896 {
2897   rtx dest = operands[0];
2898   rtx src = operands[1];
2899   rtx element = operands[2];
2900   rtx v4si_tmp = operands[3];
2901   rtx df_tmp = operands[4];
2902   int value;
2903
2904   if (!VECTOR_ELT_ORDER_BIG)
2905     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
2906
2907   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
2908      instruction.  */
2909   value = INTVAL (element);
2910   if (value != 0)
2911     {
2912       if (GET_CODE (v4si_tmp) == SCRATCH)
2913         v4si_tmp = gen_reg_rtx (V4SImode);
2914       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
2915     }
2916   else
2917     v4si_tmp = src;
2918
2919   if (GET_CODE (df_tmp) == SCRATCH)
2920     df_tmp = gen_reg_rtx (DFmode);
2921
2922   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
2923
2924   if (<MODE>mode == SFmode)
2925     emit_insn (gen_truncdfsf2 (dest, df_tmp));
2926   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
2927     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
2928   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
2929            && TARGET_FLOAT128_HW)
2930     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
2931   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
2932     emit_insn (gen_extenddfif2 (dest, df_tmp));
2933   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
2934     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
2935   else
2936     gcc_unreachable ();
2937
2938   DONE;
2939 })
2940
2941 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
2942 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
2943 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
2944 ;; vector short or vector unsigned short.
2945 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
2946   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
2947         (float:FL_CONV
2948          (vec_select:<VSX_EXTRACT_I:VS_scalar>
2949           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
2950           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
2951    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
2952   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
2953    && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER"
2954   "#"
2955   "&& reload_completed"
2956   [(parallel [(set (match_dup 3)
2957                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
2958                     (match_dup 1)
2959                     (parallel [(match_dup 2)])))
2960               (clobber (scratch:SI))])
2961    (set (match_dup 4)
2962         (sign_extend:DI (match_dup 3)))
2963    (set (match_dup 0)
2964         (float:<FL_CONV:MODE> (match_dup 4)))]
2965 {
2966   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
2967 })
2968
2969 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
2970   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
2971         (unsigned_float:FL_CONV
2972          (vec_select:<VSX_EXTRACT_I:VS_scalar>
2973           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
2974           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
2975    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
2976   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
2977    && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER"
2978   "#"
2979   "&& reload_completed"
2980   [(parallel [(set (match_dup 3)
2981                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
2982                     (match_dup 1)
2983                     (parallel [(match_dup 2)])))
2984               (clobber (scratch:SI))])
2985    (set (match_dup 0)
2986         (float:<FL_CONV:MODE> (match_dup 4)))]
2987 {
2988   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
2989 })
2990
2991 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
2992 (define_insn "vsx_set_<mode>_p9"
2993   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
2994         (unspec:VSX_EXTRACT_I
2995          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
2996           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
2997           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
2998          UNSPEC_VSX_SET))]
2999   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
3000    && TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
3001 {
3002   int ele = INTVAL (operands[3]);
3003   int nunits = GET_MODE_NUNITS (<MODE>mode);
3004
3005   if (!VECTOR_ELT_ORDER_BIG)
3006     ele = nunits - 1 - ele;
3007
3008   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3009   if (<MODE>mode == V4SImode)
3010     return "xxinsertw %x0,%x2,%3";
3011   else
3012     return "vinsert<wd> %0,%2,%3";
3013 }
3014   [(set_attr "type" "vecperm")])
3015
3016 ;; Expanders for builtins
3017 (define_expand "vsx_mergel_<mode>"
3018   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
3019    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
3020    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
3021   "VECTOR_MEM_VSX_P (<MODE>mode)"
3022 {
3023   rtvec v;
3024   rtx x;
3025
3026   /* Special handling for LE with -maltivec=be.  */
3027   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
3028     {
3029       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
3030       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
3031     }
3032   else
3033     {
3034       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
3035       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
3036     }
3037
3038   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
3039   emit_insn (gen_rtx_SET (operands[0], x));
3040   DONE;
3041 })
3042
3043 (define_expand "vsx_mergeh_<mode>"
3044   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
3045    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
3046    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
3047   "VECTOR_MEM_VSX_P (<MODE>mode)"
3048 {
3049   rtvec v;
3050   rtx x;
3051
3052   /* Special handling for LE with -maltivec=be.  */
3053   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
3054     {
3055       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
3056       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
3057     }
3058   else
3059     {
3060       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
3061       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
3062     }
3063
3064   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
3065   emit_insn (gen_rtx_SET (operands[0], x));
3066   DONE;
3067 })
3068
3069 ;; V2DF/V2DI splat
3070 (define_insn "vsx_splat_<mode>"
3071   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>,<VSa>,we")
3072         (vec_duplicate:VSX_D
3073          (match_operand:<VS_scalar> 1 "splat_input_operand" "<VS_64reg>,Z,b")))]
3074   "VECTOR_MEM_VSX_P (<MODE>mode)"
3075   "@
3076    xxpermdi %x0,%x1,%x1,0
3077    lxvdsx %x0,%y1
3078    mtvsrdd %x0,%1,%1"
3079   [(set_attr "type" "vecperm,vecload,vecperm")])
3080
3081 ;; V4SI splat support
3082 (define_insn "vsx_splat_v4si"
3083   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
3084         (vec_duplicate:V4SI
3085          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
3086   "TARGET_P9_VECTOR"
3087   "@
3088    mtvsrws %x0,%1
3089    lxvwsx %x0,%y1"
3090   [(set_attr "type" "vecperm,vecload")])
3091
3092 ;; SImode is not currently allowed in vector registers.  This pattern
3093 ;; allows us to use direct move to get the value in a vector register
3094 ;; so that we can use XXSPLTW
3095 (define_insn "vsx_splat_v4si_di"
3096   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
3097         (vec_duplicate:V4SI
3098          (truncate:SI
3099           (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
3100   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3101   "@
3102    xxspltw %x0,%x1,1
3103    mtvsrws %x0,%1"
3104   [(set_attr "type" "vecperm")])
3105
3106 ;; V4SF splat (ISA 3.0)
3107 (define_insn_and_split "vsx_splat_v4sf"
3108   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
3109         (vec_duplicate:V4SF
3110          (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
3111   "TARGET_P9_VECTOR"
3112   "@
3113    lxvwsx %x0,%y1
3114    #
3115    mtvsrws %x0,%1"
3116   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
3117   [(set (match_dup 0)
3118         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
3119    (set (match_dup 0)
3120         (unspec:V4SF [(match_dup 0)
3121                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
3122   ""
3123   [(set_attr "type" "vecload,vecperm,mftgpr")
3124    (set_attr "length" "4,8,4")])
3125
3126 ;; V4SF/V4SI splat from a vector element
3127 (define_insn "vsx_xxspltw_<mode>"
3128   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3129         (vec_duplicate:VSX_W
3130          (vec_select:<VS_scalar>
3131           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3132           (parallel
3133            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
3134   "VECTOR_MEM_VSX_P (<MODE>mode)"
3135 {
3136   if (!BYTES_BIG_ENDIAN)
3137     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
3138
3139   return "xxspltw %x0,%x1,%2";
3140 }
3141   [(set_attr "type" "vecperm")])
3142
3143 (define_insn "vsx_xxspltw_<mode>_direct"
3144   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3145         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3146                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
3147                       UNSPEC_VSX_XXSPLTW))]
3148   "VECTOR_MEM_VSX_P (<MODE>mode)"
3149   "xxspltw %x0,%x1,%2"
3150   [(set_attr "type" "vecperm")])
3151
3152 ;; V16QI/V8HI splat support on ISA 2.07
3153 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
3154   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
3155         (vec_duplicate:VSX_SPLAT_I
3156          (truncate:<VS_scalar>
3157           (match_operand:DI 1 "altivec_register_operand" "v"))))]
3158   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3159   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
3160   [(set_attr "type" "vecperm")])
3161
3162 ;; V2DF/V2DI splat for use by vec_splat builtin
3163 (define_insn "vsx_xxspltd_<mode>"
3164   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3165         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
3166                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
3167                       UNSPEC_VSX_XXSPLTD))]
3168   "VECTOR_MEM_VSX_P (<MODE>mode)"
3169 {
3170   if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
3171       || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
3172     return "xxpermdi %x0,%x1,%x1,0";
3173   else
3174     return "xxpermdi %x0,%x1,%x1,3";
3175 }
3176   [(set_attr "type" "vecperm")])
3177
3178 ;; V4SF/V4SI interleave
3179 (define_insn "vsx_xxmrghw_<mode>"
3180   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
3181         (vec_select:VSX_W
3182           (vec_concat:<VS_double>
3183             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
3184             (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
3185           (parallel [(const_int 0) (const_int 4)
3186                      (const_int 1) (const_int 5)])))]
3187   "VECTOR_MEM_VSX_P (<MODE>mode)"
3188 {
3189   if (BYTES_BIG_ENDIAN)
3190     return "xxmrghw %x0,%x1,%x2";
3191   else
3192     return "xxmrglw %x0,%x2,%x1";
3193 }
3194   [(set_attr "type" "vecperm")])
3195
3196 (define_insn "vsx_xxmrglw_<mode>"
3197   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
3198         (vec_select:VSX_W
3199           (vec_concat:<VS_double>
3200             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
3201             (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
3202           (parallel [(const_int 2) (const_int 6)
3203                      (const_int 3) (const_int 7)])))]
3204   "VECTOR_MEM_VSX_P (<MODE>mode)"
3205 {
3206   if (BYTES_BIG_ENDIAN)
3207     return "xxmrglw %x0,%x1,%x2";
3208   else
3209     return "xxmrghw %x0,%x2,%x1";
3210 }
3211   [(set_attr "type" "vecperm")])
3212
3213 ;; Shift left double by word immediate
3214 (define_insn "vsx_xxsldwi_<mode>"
3215   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
3216         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
3217                        (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
3218                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
3219                       UNSPEC_VSX_SLDWI))]
3220   "VECTOR_MEM_VSX_P (<MODE>mode)"
3221   "xxsldwi %x0,%x1,%x2,%3"
3222   [(set_attr "type" "vecperm")])
3223
3224 \f
3225 ;; Vector reduction insns and splitters
3226
3227 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
3228   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
3229         (VEC_reduc:V2DF
3230          (vec_concat:V2DF
3231           (vec_select:DF
3232            (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
3233            (parallel [(const_int 1)]))
3234           (vec_select:DF
3235            (match_dup 1)
3236            (parallel [(const_int 0)])))
3237          (match_dup 1)))
3238    (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
3239   "VECTOR_UNIT_VSX_P (V2DFmode)"
3240   "#"
3241   ""
3242   [(const_int 0)]
3243   "
3244 {
3245   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
3246              ? gen_reg_rtx (V2DFmode)
3247              : operands[2];
3248   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
3249   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
3250   DONE;
3251 }"
3252   [(set_attr "length" "8")
3253    (set_attr "type" "veccomplex")])
3254
3255 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
3256   [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
3257         (VEC_reduc:V4SF
3258          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
3259          (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
3260    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
3261    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
3262   "VECTOR_UNIT_VSX_P (V4SFmode)"
3263   "#"
3264   ""
3265   [(const_int 0)]
3266   "
3267 {
3268   rtx op0 = operands[0];
3269   rtx op1 = operands[1];
3270   rtx tmp2, tmp3, tmp4;
3271
3272   if (can_create_pseudo_p ())
3273     {
3274       tmp2 = gen_reg_rtx (V4SFmode);
3275       tmp3 = gen_reg_rtx (V4SFmode);
3276       tmp4 = gen_reg_rtx (V4SFmode);
3277     }
3278   else
3279     {
3280       tmp2 = operands[2];
3281       tmp3 = operands[3];
3282       tmp4 = tmp2;
3283     }
3284
3285   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
3286   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
3287   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
3288   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
3289   DONE;
3290 }"
3291   [(set_attr "length" "16")
3292    (set_attr "type" "veccomplex")])
3293
3294 ;; Combiner patterns with the vector reduction patterns that knows we can get
3295 ;; to the top element of the V2DF array without doing an extract.
3296
3297 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
3298   [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
3299         (vec_select:DF
3300          (VEC_reduc:V2DF
3301           (vec_concat:V2DF
3302            (vec_select:DF
3303             (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
3304             (parallel [(const_int 1)]))
3305            (vec_select:DF
3306             (match_dup 1)
3307             (parallel [(const_int 0)])))
3308           (match_dup 1))
3309          (parallel [(const_int 1)])))
3310    (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
3311   "VECTOR_UNIT_VSX_P (V2DFmode)"
3312   "#"
3313   ""
3314   [(const_int 0)]
3315   "
3316 {
3317   rtx hi = gen_highpart (DFmode, operands[1]);
3318   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
3319             ? gen_reg_rtx (DFmode)
3320             : operands[2];
3321
3322   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
3323   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
3324   DONE;
3325 }"
3326   [(set_attr "length" "8")
3327    (set_attr "type" "veccomplex")])
3328
3329 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
3330   [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
3331         (vec_select:SF
3332          (VEC_reduc:V4SF
3333           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
3334           (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
3335          (parallel [(const_int 3)])))
3336    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
3337    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
3338    (clobber (match_scratch:V4SF 4 "=0,0"))]
3339   "VECTOR_UNIT_VSX_P (V4SFmode)"
3340   "#"
3341   ""
3342   [(const_int 0)]
3343   "
3344 {
3345   rtx op0 = operands[0];
3346   rtx op1 = operands[1];
3347   rtx tmp2, tmp3, tmp4, tmp5;
3348
3349   if (can_create_pseudo_p ())
3350     {
3351       tmp2 = gen_reg_rtx (V4SFmode);
3352       tmp3 = gen_reg_rtx (V4SFmode);
3353       tmp4 = gen_reg_rtx (V4SFmode);
3354       tmp5 = gen_reg_rtx (V4SFmode);
3355     }
3356   else
3357     {
3358       tmp2 = operands[2];
3359       tmp3 = operands[3];
3360       tmp4 = tmp2;
3361       tmp5 = operands[4];
3362     }
3363
3364   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
3365   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
3366   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
3367   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
3368   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
3369   DONE;
3370 }"
3371   [(set_attr "length" "20")
3372    (set_attr "type" "veccomplex")])
3373
3374 \f
3375 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
3376 (define_peephole
3377   [(set (match_operand:P 0 "base_reg_operand" "")
3378         (match_operand:P 1 "short_cint_operand" ""))
3379    (set (match_operand:VSX_M 2 "vsx_register_operand" "")
3380         (mem:VSX_M (plus:P (match_dup 0)
3381                            (match_operand:P 3 "int_reg_operand" ""))))]
3382   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
3383   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
3384   [(set_attr "length" "8")
3385    (set_attr "type" "vecload")])
3386
3387 (define_peephole
3388   [(set (match_operand:P 0 "base_reg_operand" "")
3389         (match_operand:P 1 "short_cint_operand" ""))
3390    (set (match_operand:VSX_M 2 "vsx_register_operand" "")
3391         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
3392                            (match_dup 0))))]
3393   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
3394   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
3395   [(set_attr "length" "8")
3396    (set_attr "type" "vecload")])
3397
3398 \f
3399 ;; ISA 3.0 vector extend sign support
3400
3401 (define_insn "vsx_sign_extend_qi_<mode>"
3402   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
3403         (unspec:VSINT_84
3404          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
3405          UNSPEC_VSX_SIGN_EXTEND))]
3406   "TARGET_P9_VECTOR"
3407   "vextsb2<wd> %0,%1"
3408   [(set_attr "type" "vecexts")])
3409
3410 (define_insn "vsx_sign_extend_hi_<mode>"
3411   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
3412         (unspec:VSINT_84
3413          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
3414          UNSPEC_VSX_SIGN_EXTEND))]
3415   "TARGET_P9_VECTOR"
3416   "vextsh2<wd> %0,%1"
3417   [(set_attr "type" "vecexts")])
3418
3419 (define_insn "*vsx_sign_extend_si_v2di"
3420   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
3421         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
3422                      UNSPEC_VSX_SIGN_EXTEND))]
3423   "TARGET_P9_VECTOR"
3424   "vextsw2d %0,%1"
3425   [(set_attr "type" "vecexts")])
3426
3427 \f
3428 ;; ISA 3.0 Binary Floating-Point Support
3429
3430 ;; VSX Scalar Extract Exponent Double-Precision
3431 (define_insn "xsxexpdp"
3432   [(set (match_operand:DI 0 "register_operand" "=r")
3433         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
3434          UNSPEC_VSX_SXEXPDP))]
3435   "TARGET_P9_VECTOR && TARGET_64BIT"
3436   "xsxexpdp %0,%x1"
3437   [(set_attr "type" "integer")])
3438
3439 ;; VSX Scalar Extract Significand Double-Precision
3440 (define_insn "xsxsigdp"
3441   [(set (match_operand:DI 0 "register_operand" "=r")
3442         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
3443          UNSPEC_VSX_SXSIGDP))]
3444   "TARGET_P9_VECTOR && TARGET_64BIT"
3445   "xsxsigdp %0,%x1"
3446   [(set_attr "type" "integer")])
3447
3448 ;; VSX Scalar Insert Exponent Double-Precision
3449 (define_insn "xsiexpdp"
3450   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
3451         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
3452                     (match_operand:DI 2 "register_operand" "r")]
3453          UNSPEC_VSX_SIEXPDP))]
3454   "TARGET_P9_VECTOR && TARGET_64BIT"
3455   "xsiexpdp %x0,%1,%2"
3456   [(set_attr "type" "fpsimple")])
3457
3458 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
3459 (define_insn "xsiexpdpf"
3460   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
3461         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
3462                     (match_operand:DI 2 "register_operand" "r")]
3463          UNSPEC_VSX_SIEXPDP))]
3464   "TARGET_P9_VECTOR && TARGET_64BIT"
3465   "xsiexpdp %x0,%1,%2"
3466   [(set_attr "type" "fpsimple")])
3467
3468 ;; VSX Scalar Compare Exponents Double-Precision
3469 (define_expand "xscmpexpdp_<code>"
3470   [(set (match_dup 3)
3471         (compare:CCFP
3472          (unspec:DF
3473           [(match_operand:DF 1 "vsx_register_operand" "wa")
3474            (match_operand:DF 2 "vsx_register_operand" "wa")]
3475           UNSPEC_VSX_SCMPEXPDP)
3476          (const_int 0)))
3477    (set (match_operand:SI 0 "register_operand" "=r")
3478         (CMP_TEST:SI (match_dup 3)
3479                      (const_int 0)))]
3480   "TARGET_P9_VECTOR"
3481 {
3482   operands[3] = gen_reg_rtx (CCFPmode);
3483 })
3484
3485 (define_insn "*xscmpexpdp"
3486   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
3487         (compare:CCFP
3488          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
3489                      (match_operand:DF 2 "vsx_register_operand" "wa")]
3490           UNSPEC_VSX_SCMPEXPDP)
3491          (match_operand:SI 3 "zero_constant" "j")))]
3492   "TARGET_P9_VECTOR"
3493   "xscmpexpdp %0,%x1,%x2"
3494   [(set_attr "type" "fpcompare")])
3495
3496 ;; VSX Scalar Test Data Class Double- and Single-Precision
3497 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
3498 ;;   if any of the conditions tested by operand 2 are satisfied.
3499 ;;   The gt and unordered bits are cleared to zero.)
3500 (define_expand "xststdc<Fvsx>"
3501   [(set (match_dup 3)
3502         (compare:CCFP
3503          (unspec:SFDF
3504           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
3505            (match_operand:SI 2 "u7bit_cint_operand" "n")]
3506           UNSPEC_VSX_STSTDC)
3507          (match_dup 4)))
3508    (set (match_operand:SI 0 "register_operand" "=r")
3509         (eq:SI (match_dup 3)
3510                (const_int 0)))]
3511   "TARGET_P9_VECTOR"
3512 {
3513   operands[3] = gen_reg_rtx (CCFPmode);
3514   operands[4] = CONST0_RTX (SImode);
3515 })
3516
3517 ;; The VSX Scalar Test Data Class Double- and Single-Precision
3518 ;; instruction may also be used to test for negative value.
3519 (define_expand "xststdcneg<Fvsx>"
3520   [(set (match_dup 2)
3521         (compare:CCFP
3522          (unspec:SFDF
3523           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
3524            (const_int 0)]
3525           UNSPEC_VSX_STSTDC)
3526          (match_dup 3)))
3527    (set (match_operand:SI 0 "register_operand" "=r")
3528         (lt:SI (match_dup 2)
3529                (const_int 0)))]
3530   "TARGET_P9_VECTOR"
3531 {
3532   operands[2] = gen_reg_rtx (CCFPmode);
3533   operands[3] = CONST0_RTX (SImode);
3534 })
3535
3536 (define_insn "*xststdc<Fvsx>"
3537   [(set (match_operand:CCFP 0 "" "=y")
3538         (compare:CCFP
3539          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
3540                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
3541           UNSPEC_VSX_STSTDC)
3542          (match_operand:SI 3 "zero_constant" "j")))]
3543   "TARGET_P9_VECTOR"
3544   "xststdc<Fvsx> %0,%x1,%2"
3545   [(set_attr "type" "fpcompare")])
3546
3547 ;; VSX Vector Extract Exponent Double and Single Precision
3548 (define_insn "xvxexp<VSs>"
3549   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
3550         (unspec:VSX_F
3551          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
3552          UNSPEC_VSX_VXEXP))]
3553   "TARGET_P9_VECTOR"
3554   "xvxexp<VSs> %x0,%x1"
3555   [(set_attr "type" "vecsimple")])
3556
3557 ;; VSX Vector Extract Significand Double and Single Precision
3558 (define_insn "xvxsig<VSs>"
3559   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
3560         (unspec:VSX_F
3561          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
3562          UNSPEC_VSX_VXSIG))]
3563   "TARGET_P9_VECTOR"
3564   "xvxsig<VSs> %x0,%x1"
3565   [(set_attr "type" "vecsimple")])
3566
3567 ;; VSX Vector Insert Exponent Double and Single Precision
3568 (define_insn "xviexp<VSs>"
3569   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
3570         (unspec:VSX_F
3571          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
3572           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
3573          UNSPEC_VSX_VIEXP))]
3574   "TARGET_P9_VECTOR"
3575   "xviexp<VSs> %x0,%x1,%x2"
3576   [(set_attr "type" "vecsimple")])
3577
3578 ;; VSX Vector Test Data Class Double and Single Precision
3579 ;; The corresponding elements of the result vector are all ones
3580 ;; if any of the conditions tested by operand 3 are satisfied.
3581 (define_insn "xvtstdc<VSs>"
3582   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
3583         (unspec:<VSI>
3584          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
3585           (match_operand:SI 2 "u7bit_cint_operand" "n")]
3586          UNSPEC_VSX_VTSTDC))]
3587   "TARGET_P9_VECTOR"
3588   "xvtstdc<VSs> %x0,%x1,%2"
3589   [(set_attr "type" "vecsimple")])
3590
3591 ;; ISA 3.0 String Operations Support
3592
3593 ;; Compare vectors producing a vector result and a predicate, setting CR6
3594 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
3595 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
3596 ;; need to match v4sf, v2df, or v2di modes because those are expanded
3597 ;; to use Power8 instructions.
3598 (define_insn "*vsx_ne_<mode>_p"
3599   [(set (reg:CC CR6_REGNO)
3600         (unspec:CC
3601          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3602                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
3603          UNSPEC_PREDICATE))
3604    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
3605         (ne:VSX_EXTRACT_I (match_dup 1)
3606                           (match_dup 2)))]
3607   "TARGET_P9_VECTOR"
3608   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
3609   [(set_attr "type" "vecsimple")])
3610
3611 (define_insn "*vector_nez_<mode>_p"
3612   [(set (reg:CC CR6_REGNO)
3613         (unspec:CC [(unspec:VI
3614                      [(match_operand:VI 1 "gpc_reg_operand" "v")
3615                       (match_operand:VI 2 "gpc_reg_operand" "v")]
3616                      UNSPEC_NEZ_P)]
3617          UNSPEC_PREDICATE))
3618    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
3619         (unspec:VI [(match_dup 1)
3620                     (match_dup 2)]
3621          UNSPEC_NEZ_P))]
3622   "TARGET_P9_VECTOR"
3623   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
3624   [(set_attr "type" "vecsimple")])
3625
3626 ;; Load VSX Vector with Length
3627 (define_expand "lxvl"
3628   [(set (match_dup 3)
3629         (match_operand:DI 2 "register_operand"))
3630    (set (match_operand:V16QI 0 "vsx_register_operand")
3631         (unspec:V16QI
3632          [(match_operand:DI 1 "gpc_reg_operand")
3633           (match_dup 3)]
3634          UNSPEC_LXVL))]
3635   "TARGET_P9_VECTOR && TARGET_64BIT"
3636 {
3637   operands[3] = gen_reg_rtx (DImode);
3638 })
3639
3640 (define_insn "*lxvl"
3641   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3642         (unspec:V16QI
3643          [(match_operand:DI 1 "gpc_reg_operand" "b")
3644           (match_operand:DI 2 "register_operand" "+r")]
3645          UNSPEC_LXVL))]
3646   "TARGET_P9_VECTOR && TARGET_64BIT"
3647   "sldi %2,%2, 56\; lxvl %x0,%1,%2"
3648   [(set_attr "length" "8")
3649    (set_attr "type" "vecload")])
3650
3651 ;; Store VSX Vector with Length
3652 (define_expand "stxvl"
3653   [(set (match_dup 3)
3654         (match_operand:DI 2 "register_operand"))
3655    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
3656         (unspec:V16QI
3657          [(match_operand:V16QI 0 "vsx_register_operand")
3658           (match_dup 3)]
3659          UNSPEC_STXVL))]
3660   "TARGET_P9_VECTOR && TARGET_64BIT"
3661 {
3662   operands[3] = gen_reg_rtx (DImode);
3663 })
3664
3665 (define_insn "*stxvl"
3666   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
3667         (unspec:V16QI
3668          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
3669           (match_operand:DI 2 "register_operand" "+r")]
3670          UNSPEC_STXVL))]
3671   "TARGET_P9_VECTOR && TARGET_64BIT"
3672   "sldi %2,%2\;stxvl %x0,%1,%2"
3673   [(set_attr "length" "8")
3674    (set_attr "type" "vecstore")])
3675
3676 ;; Vector Compare Not Equal Byte
3677 (define_insn "vcmpneb"
3678   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
3679         (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v")
3680                        (match_operand:V16QI 2 "altivec_register_operand" "v")]
3681          UNSPEC_VCMPNEB))]
3682   "TARGET_P9_VECTOR"
3683   "vcmpneb %0,%1,%2"
3684   [(set_attr "type" "vecsimple")])
3685
3686 ;; Vector Compare Not Equal or Zero Byte
3687 (define_insn "vcmpnezb"
3688   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
3689         (unspec:V16QI
3690          [(match_operand:V16QI 1 "altivec_register_operand" "v")
3691           (match_operand:V16QI 2 "altivec_register_operand" "v")]
3692          UNSPEC_VCMPNEZB))]
3693   "TARGET_P9_VECTOR"
3694   "vcmpnezb %0,%1,%2"
3695   [(set_attr "type" "vecsimple")])
3696
3697 ;; Vector Compare Not Equal Half Word
3698 (define_insn "vcmpneh"
3699   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
3700         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
3701                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
3702          UNSPEC_VCMPNEH))]
3703   "TARGET_P9_VECTOR"
3704   "vcmpneh %0,%1,%2"
3705   [(set_attr "type" "vecsimple")])
3706
3707 ;; Vector Compare Not Equal or Zero Half Word
3708 (define_insn "vcmpnezh"
3709   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
3710         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
3711                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
3712          UNSPEC_VCMPNEZH))]
3713   "TARGET_P9_VECTOR"
3714   "vcmpnezh %0,%1,%2"
3715   [(set_attr "type" "vecsimple")])
3716
3717 ;; Vector Compare Not Equal Word
3718 (define_insn "vcmpnew"
3719   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
3720         (unspec:V4SI
3721          [(match_operand:V4SI 1 "altivec_register_operand" "v")
3722           (match_operand:V4SI 2 "altivec_register_operand" "v")]
3723          UNSPEC_VCMPNEH))]
3724   "TARGET_P9_VECTOR"
3725   "vcmpnew %0,%1,%2"
3726   [(set_attr "type" "vecsimple")])
3727
3728 ;; Vector Compare Not Equal or Zero Word
3729 (define_insn "vcmpnezw"
3730   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
3731         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
3732                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
3733          UNSPEC_VCMPNEZW))]
3734   "TARGET_P9_VECTOR"
3735   "vcmpnezw %0,%1,%2"
3736   [(set_attr "type" "vecsimple")])
3737
3738 ;; Vector Count Leading Zero Least-Significant Bits Byte
3739 (define_insn "vclzlsbb"
3740   [(set (match_operand:SI 0 "register_operand" "=r")
3741         (unspec:SI
3742          [(match_operand:V16QI 1 "altivec_register_operand" "v")]
3743          UNSPEC_VCLZLSBB))]
3744   "TARGET_P9_VECTOR"
3745   "vclzlsbb %0,%1"
3746   [(set_attr "type" "vecsimple")])
3747
3748 ;; Vector Count Trailing Zero Least-Significant Bits Byte
3749 (define_insn "vctzlsbb"
3750   [(set (match_operand:SI 0 "register_operand" "=r")
3751         (unspec:SI
3752          [(match_operand:V16QI 1 "altivec_register_operand" "v")]
3753          UNSPEC_VCTZLSBB))]
3754   "TARGET_P9_VECTOR"
3755   "vctzlsbb %0,%1"
3756   [(set_attr "type" "vecsimple")])
3757
3758 ;; Vector Extract Unsigned Byte Left-Indexed
3759 (define_insn "vextublx"
3760   [(set (match_operand:SI 0 "register_operand" "=r")
3761         (unspec:SI
3762          [(match_operand:SI 1 "register_operand" "r")
3763           (match_operand:V16QI 2 "altivec_register_operand" "v")]
3764          UNSPEC_VEXTUBLX))]
3765   "TARGET_P9_VECTOR"
3766   "vextublx %0,%1,%2"
3767   [(set_attr "type" "vecsimple")])
3768
3769 ;; Vector Extract Unsigned Byte Right-Indexed
3770 (define_insn "vextubrx"
3771   [(set (match_operand:SI 0 "register_operand" "=r")
3772         (unspec:SI
3773          [(match_operand:SI 1 "register_operand" "r")
3774           (match_operand:V16QI 2 "altivec_register_operand" "v")]
3775          UNSPEC_VEXTUBRX))]
3776   "TARGET_P9_VECTOR"
3777   "vextubrx %0,%1,%2"
3778   [(set_attr "type" "vecsimple")])
3779
3780 ;; Vector Extract Unsigned Half Word Left-Indexed
3781 (define_insn "vextuhlx"
3782   [(set (match_operand:SI 0 "register_operand" "=r")
3783         (unspec:SI
3784          [(match_operand:SI 1 "register_operand" "r")
3785           (match_operand:V8HI 2 "altivec_register_operand" "v")]
3786          UNSPEC_VEXTUHLX))]
3787   "TARGET_P9_VECTOR"
3788   "vextuhlx %0,%1,%2"
3789   [(set_attr "type" "vecsimple")])
3790
3791 ;; Vector Extract Unsigned Half Word Right-Indexed
3792 (define_insn "vextuhrx"
3793   [(set (match_operand:SI 0 "register_operand" "=r")
3794         (unspec:SI
3795          [(match_operand:SI 1 "register_operand" "r")
3796           (match_operand:V8HI 2 "altivec_register_operand" "v")]
3797          UNSPEC_VEXTUHRX))]
3798   "TARGET_P9_VECTOR"
3799   "vextuhrx %0,%1,%2"
3800   [(set_attr "type" "vecsimple")])
3801
3802 ;; Vector Extract Unsigned Word Left-Indexed
3803 (define_insn "vextuwlx"
3804   [(set (match_operand:SI 0 "register_operand" "=r")
3805         (unspec:SI
3806          [(match_operand:SI 1 "register_operand" "r")
3807           (match_operand:V4SI 2 "altivec_register_operand" "v")]
3808          UNSPEC_VEXTUWLX))]
3809   "TARGET_P9_VECTOR"
3810   "vextuwlx %0,%1,%2"
3811   [(set_attr "type" "vecsimple")])
3812
3813 ;; Vector Extract Unsigned Word Right-Indexed
3814 (define_insn "vextuwrx"
3815   [(set (match_operand:SI 0 "register_operand" "=r")
3816         (unspec:SI
3817          [(match_operand:SI 1 "register_operand" "r")
3818           (match_operand:V4SI 2 "altivec_register_operand" "v")]
3819          UNSPEC_VEXTUWRX))]
3820   "TARGET_P9_VECTOR"
3821   "vextuwrx %0,%1,%2"
3822   [(set_attr "type" "vecsimple")])
3823
3824 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
3825 ;; endian version needs to adjust the byte number, and the V4SI element in
3826 ;; vinsert4b.
3827 (define_expand "vextract4b"
3828   [(set (match_operand:DI 0 "gpc_reg_operand")
3829         (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
3830                     (match_operand:QI 2 "const_0_to_12_operand")]
3831                    UNSPEC_XXEXTRACTUW))]
3832   "TARGET_P9_VECTOR"
3833 {
3834   if (!VECTOR_ELT_ORDER_BIG)
3835     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
3836 })
3837
3838 (define_insn_and_split "*vextract4b_internal"
3839   [(set (match_operand:DI 0 "gpc_reg_operand" "=wj,r")
3840         (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand" "wa,v")
3841                     (match_operand:QI 2 "const_0_to_12_operand" "n,n")]
3842                    UNSPEC_XXEXTRACTUW))]
3843   "TARGET_P9_VECTOR"
3844   "@
3845    xxextractuw %x0,%x1,%2
3846    #"
3847   "&& reload_completed && int_reg_operand (operands[0], DImode)"
3848   [(const_int 0)]
3849 {
3850   rtx op0 = operands[0];
3851   rtx op1 = operands[1];
3852   rtx op2 = operands[2];
3853   rtx op0_si = gen_rtx_REG (SImode, REGNO (op0));
3854   rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (op1));
3855
3856   emit_move_insn (op0, op2);
3857   if (VECTOR_ELT_ORDER_BIG)
3858     emit_insn (gen_vextuwlx (op0_si, op0_si, op1_v4si));
3859   else
3860     emit_insn (gen_vextuwrx (op0_si, op0_si, op1_v4si));
3861   DONE;
3862 }
3863   [(set_attr "type" "vecperm")])
3864
3865 (define_expand "vinsert4b"
3866   [(set (match_operand:V16QI 0 "vsx_register_operand")
3867         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
3868                        (match_operand:V16QI 2 "vsx_register_operand")
3869                        (match_operand:QI 3 "const_0_to_12_operand")]
3870                    UNSPEC_XXINSERTW))]
3871   "TARGET_P9_VECTOR"
3872 {
3873   if (!VECTOR_ELT_ORDER_BIG)
3874     {
3875       rtx op1 = operands[1];
3876       rtx v4si_tmp = gen_reg_rtx (V4SImode);
3877       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
3878       operands[1] = v4si_tmp;
3879       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
3880     }
3881 })
3882
3883 (define_insn "*vinsert4b_internal"
3884   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3885         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
3886                        (match_operand:V16QI 2 "vsx_register_operand" "0")
3887                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
3888                    UNSPEC_XXINSERTW))]
3889   "TARGET_P9_VECTOR"
3890   "xxinsertw %x0,%x1,%3"
3891   [(set_attr "type" "vecperm")])
3892
3893 (define_expand "vinsert4b_di"
3894   [(set (match_operand:V16QI 0 "vsx_register_operand")
3895         (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand")
3896                        (match_operand:V16QI 2 "vsx_register_operand")
3897                        (match_operand:QI 3 "const_0_to_12_operand")]
3898                    UNSPEC_XXINSERTW))]
3899   "TARGET_P9_VECTOR"
3900 {
3901   if (!VECTOR_ELT_ORDER_BIG)
3902     operands[3] = GEN_INT (12 - INTVAL (operands[3]));
3903 })
3904
3905 (define_insn "*vinsert4b_di_internal"
3906   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3907         (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand" "wj")
3908                        (match_operand:V16QI 2 "vsx_register_operand" "0")
3909                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
3910                    UNSPEC_XXINSERTW))]
3911   "TARGET_P9_VECTOR"
3912   "xxinsertw %x0,%x1,%3"
3913   [(set_attr "type" "vecperm")])
3914
3915 \f
3916 ;; Support for ISA 3.0 vector byte reverse
3917
3918 ;; Swap all bytes with in a vector
3919 (define_insn "p9_xxbrq_v1ti"
3920   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
3921         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
3922   "TARGET_P9_VECTOR"
3923   "xxbrq %x0,%x1"
3924   [(set_attr "type" "vecperm")])
3925
3926 (define_expand "p9_xxbrq_v16qi"
3927   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
3928    (use (match_operand:V16QI 1 "vsx_register_operand" "=wa"))]
3929   "TARGET_P9_VECTOR"
3930 {
3931   rtx op0 = gen_lowpart (V1TImode, operands[0]);
3932   rtx op1 = gen_lowpart (V1TImode, operands[1]);
3933   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
3934   DONE;
3935 })
3936
3937 ;; Swap all bytes in each 64-bit element
3938 (define_insn "p9_xxbrd_<mode>"
3939   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3940         (bswap:VSX_D (match_operand:VSX_D 1 "vsx_register_operand" "wa")))]
3941   "TARGET_P9_VECTOR"
3942   "xxbrd %x0,%x1"
3943   [(set_attr "type" "vecperm")])
3944
3945 ;; Swap all bytes in each 32-bit element
3946 (define_insn "p9_xxbrw_<mode>"
3947   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3948         (bswap:VSX_W (match_operand:VSX_W 1 "vsx_register_operand" "wa")))]
3949   "TARGET_P9_VECTOR"
3950   "xxbrw %x0,%x1"
3951   [(set_attr "type" "vecperm")])
3952
3953 ;; Swap all bytes in each 16-bit element
3954 (define_insn "p9_xxbrh_v8hi"
3955   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3956         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
3957   "TARGET_P9_VECTOR"
3958   "xxbrh %x0,%x1"
3959   [(set_attr "type" "vecperm")])
3960 \f
3961
3962 ;; Operand numbers for the following peephole2
3963 (define_constants
3964   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
3965    (SFBOOL_TMP_VSX               1)             ;; vector temporary
3966    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
3967    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
3968    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
3969    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
3970    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
3971    (SFBOOL_SHL_D                 7)             ;; shift left dest
3972    (SFBOOL_SHL_A                 8)             ;; shift left arg
3973    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
3974    (SFBOOL_BOOL_A_DI            10)             ;; SFBOOL_BOOL_A1/A2 as DImode
3975    (SFBOOL_TMP_VSX_DI           11)             ;; SFBOOL_TMP_VSX as DImode
3976    (SFBOOL_MTVSR_D_V4SF         12)])           ;; SFBOOL_MTVSRD_D as V4SFmode
3977
3978 ;; Attempt to optimize some common GLIBC operations using logical operations to
3979 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
3980 ;; after macro expansion that looks like:
3981 ;;
3982 ;;      typedef union {
3983 ;;        float value;
3984 ;;        uint32_t word;
3985 ;;      } ieee_float_shape_type;
3986 ;;
3987 ;;      float t1;
3988 ;;      int32_t is;
3989 ;;
3990 ;;      do {
3991 ;;        ieee_float_shape_type gf_u;
3992 ;;        gf_u.value = (t1);
3993 ;;        (is) = gf_u.word;
3994 ;;      } while (0);
3995 ;;
3996 ;;      do {
3997 ;;        ieee_float_shape_type sf_u;
3998 ;;        sf_u.word = (is & 0xfffff000);
3999 ;;        (t1) = sf_u.value;
4000 ;;      } while (0);
4001 ;;
4002 ;;
4003 ;; This would result in two direct move operations (convert to memory format,
4004 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
4005 ;; scalar format).  With this peephole, we eliminate the direct move to the
4006 ;; GPR, and instead move the integer mask value to the vector register after a
4007 ;; shift and do the VSX logical operation.
4008
4009 ;; The insns for dealing with SFmode in GPR registers looks like:
4010 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
4011 ;;
4012 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
4013 ;;
4014 ;; (set (reg:DI reg3) (lshiftrt:DI (reg:DI reg3) (const_int 32)))
4015 ;;
4016 ;; (set (reg:DI reg5) (and:DI (reg:DI reg3) (reg:DI reg4)))
4017 ;;
4018 ;; (set (reg:DI reg6) (ashift:DI (reg:DI reg5) (const_int 32)))
4019 ;;
4020 ;; (set (reg:SF reg7) (unspec:SF [(reg:DI reg6)] UNSPEC_P8V_MTVSRD))
4021 ;;
4022 ;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg7)] UNSPEC_VSX_CVSPDPN))
4023
4024 (define_peephole2
4025   [(match_scratch:DI SFBOOL_TMP_GPR "r")
4026    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
4027
4028    ;; MFVSRD
4029    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
4030         (unspec:DI [(match_operand:V4SF SFBOOL_MFVSR_A "vsx_register_operand")]
4031                    UNSPEC_P8V_RELOAD_FROM_VSX))
4032
4033    ;; SRDI
4034    (set (match_dup SFBOOL_MFVSR_D)
4035         (lshiftrt:DI (match_dup SFBOOL_MFVSR_D)
4036                      (const_int 32)))
4037
4038    ;; AND/IOR/XOR operation on int
4039    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
4040         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
4041                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
4042
4043    ;; SLDI
4044    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
4045         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
4046                    (const_int 32)))
4047
4048    ;; MTVSRD
4049    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
4050         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
4051
4052   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
4053    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
4054       to compare registers, when the mode is different.  */
4055    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
4056    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
4057    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
4058    && (REG_P (operands[SFBOOL_BOOL_A2])
4059        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
4060    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
4061        || peep2_reg_dead_p (3, operands[SFBOOL_MFVSR_D]))
4062    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
4063        || (REG_P (operands[SFBOOL_BOOL_A2])
4064            && REGNO (operands[SFBOOL_MFVSR_D])
4065                 == REGNO (operands[SFBOOL_BOOL_A2])))
4066    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
4067    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
4068        || peep2_reg_dead_p (4, operands[SFBOOL_BOOL_D]))
4069    && peep2_reg_dead_p (5, operands[SFBOOL_SHL_D])"
4070   [(set (match_dup SFBOOL_TMP_GPR)
4071         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
4072                    (const_int 32)))
4073
4074    (set (match_dup SFBOOL_TMP_VSX_DI)
4075         (match_dup SFBOOL_TMP_GPR))
4076
4077    (set (match_dup SFBOOL_MTVSR_D_V4SF)
4078         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A)
4079                           (match_dup SFBOOL_TMP_VSX)))]
4080 {
4081   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
4082   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
4083   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
4084   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
4085   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
4086
4087   if (CONST_INT_P (bool_a2))
4088     {
4089       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
4090       emit_move_insn (tmp_gpr, bool_a2);
4091       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
4092     }
4093   else
4094     {
4095       int regno_bool_a1 = REGNO (bool_a1);
4096       int regno_bool_a2 = REGNO (bool_a2);
4097       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
4098                           ? regno_bool_a2 : regno_bool_a1);
4099       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
4100     }
4101
4102   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
4103   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
4104 })