gcc/config/powerpcspe/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Iterator for both scalar and vector floating point types supported by VSX
  25 (define_mode_iterator VSX_B [DF V4SF V2DF])
  26
  27 ;; Iterator for the 2 64-bit vector types
  28 (define_mode_iterator VSX_D [V2DF V2DI])
  29
  30 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  31 ;; types that goes in a single vector register.
  32 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  33                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  34                                   (TI   "TARGET_VSX_TIMODE")
  35                                   V1TI])
  36
  37 ;; Iterator for the 2 32-bit vector types
  38 (define_mode_iterator VSX_W [V4SF V4SI])
  39
  40 ;; Iterator for the DF types
  41 (define_mode_iterator VSX_DF [V2DF DF])
  42
  43 ;; Iterator for vector floating point types supported by VSX
  44 (define_mode_iterator VSX_F [V4SF V2DF])
  45
  46 ;; Iterator for logical types supported by VSX
  47 (define_mode_iterator VSX_L [V16QI
  48                              V8HI
  49                              V4SI
  50                              V2DI
  51                              V4SF
  52                              V2DF
  53                              V1TI
  54                              TI
  55                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  56                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  57
  58 ;; Iterator for memory moves.
  59 (define_mode_iterator VSX_M [V16QI
  60                              V8HI
  61                              V4SI
  62                              V2DI
  63                              V4SF
  64                              V2DF
  65                              V1TI
  66                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  67                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  68                              (TI        "TARGET_VSX_TIMODE")])
  69
  70 ;; Map into the appropriate load/store name based on the type
  71 (define_mode_attr VSm  [(V16QI "vw4")
  72                         (V8HI  "vw4")
  73                         (V4SI  "vw4")
  74                         (V4SF  "vw4")
  75                         (V2DF  "vd2")
  76                         (V2DI  "vd2")
  77                         (DF    "d")
  78                         (TF    "vd2")
  79                         (KF    "vd2")
  80                         (V1TI  "vd2")
  81                         (TI    "vd2")])
  82
  83 ;; Map into the appropriate suffix based on the type
  84 (define_mode_attr VSs   [(V16QI "sp")
  85                          (V8HI  "sp")
  86                          (V4SI  "sp")
  87                          (V4SF  "sp")
  88                          (V2DF  "dp")
  89                          (V2DI  "dp")
  90                          (DF    "dp")
  91                          (SF    "sp")
  92                          (TF    "dp")
  93                          (KF    "dp")
  94                          (V1TI  "dp")
  95                          (TI    "dp")])
  96
  97 ;; Map the register class used
  98 (define_mode_attr VSr   [(V16QI "v")
  99                          (V8HI  "v")
 100                          (V4SI  "v")
 101                          (V4SF  "wf")
 102                          (V2DI  "wd")
 103                          (V2DF  "wd")
 104                          (DI    "wi")
 105                          (DF    "ws")
 106                          (SF    "ww")
 107                          (TF    "wp")
 108                          (KF    "wq")
 109                          (V1TI  "v")
 110                          (TI    "wt")])
 111
 112 ;; Map the register class used for float<->int conversions (floating point side)
 113 ;; VSr2 is the preferred register class, VSr3 is any register class that will
 114 ;; hold the data
 115 (define_mode_attr VSr2  [(V2DF  "wd")
 116                          (V4SF  "wf")
 117                          (DF    "ws")
 118                          (SF    "ww")
 119                          (DI    "wi")
 120                          (KF    "wq")
 121                          (TF    "wp")])
 122
 123 (define_mode_attr VSr3  [(V2DF  "wa")
 124                          (V4SF  "wa")
 125                          (DF    "ws")
 126                          (SF    "ww")
 127                          (DI    "wi")
 128                          (KF    "wq")
 129                          (TF    "wp")])
 130
 131 ;; Map the register class for sp<->dp float conversions, destination
 132 (define_mode_attr VSr4  [(SF    "ws")
 133                          (DF    "f")
 134                          (V2DF  "wd")
 135                          (V4SF  "v")])
 136
 137 ;; Map the register class for sp<->dp float conversions, source
 138 (define_mode_attr VSr5  [(SF    "ws")
 139                          (DF    "f")
 140                          (V2DF  "v")
 141                          (V4SF  "wd")])
 142
 143 ;; The VSX register class that a type can occupy, even if it is not the
 144 ;; preferred register class (VSr is the preferred register class that will get
 145 ;; allocated first).
 146 (define_mode_attr VSa   [(V16QI "wa")
 147                          (V8HI  "wa")
 148                          (V4SI  "wa")
 149                          (V4SF  "wa")
 150                          (V2DI  "wa")
 151                          (V2DF  "wa")
 152                          (DI    "wi")
 153                          (DF    "ws")
 154                          (SF    "ww")
 155                          (V1TI  "wa")
 156                          (TI    "wt")
 157                          (TF    "wp")
 158                          (KF    "wq")])
 159
 160 ;; Same size integer type for floating point data
 161 (define_mode_attr VSi [(V4SF  "v4si")
 162                        (V2DF  "v2di")
 163                        (DF    "di")])
 164
 165 (define_mode_attr VSI [(V4SF  "V4SI")
 166                        (V2DF  "V2DI")
 167                        (DF    "DI")])
 168
 169 ;; Word size for same size conversion
 170 (define_mode_attr VSc [(V4SF "w")
 171                        (V2DF "d")
 172                        (DF   "d")])
 173
 174 ;; Map into either s or v, depending on whether this is a scalar or vector
 175 ;; operation
 176 (define_mode_attr VSv   [(V16QI "v")
 177                          (V8HI  "v")
 178                          (V4SI  "v")
 179                          (V4SF  "v")
 180                          (V2DI  "v")
 181                          (V2DF  "v")
 182                          (V1TI  "v")
 183                          (DF    "s")
 184                          (KF    "v")])
 185
 186 ;; Appropriate type for add ops (and other simple FP ops)
 187 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 188                                  (V4SF "vecfloat")
 189                                  (DF   "fp")])
 190
 191 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
 192                                    (V4SF "fp_addsub_s")
 193                                    (DF   "fp_addsub_d")])
 194
 195 ;; Appropriate type for multiply ops
 196 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 197                                  (V4SF "vecfloat")
 198                                  (DF   "dmul")])
 199
 200 (define_mode_attr VSfptype_mul  [(V2DF "fp_mul_d")
 201                                  (V4SF "fp_mul_s")
 202                                  (DF   "fp_mul_d")])
 203
 204 ;; Appropriate type for divide ops.
 205 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 206                                  (V4SF "vecfdiv")
 207                                  (DF   "ddiv")])
 208
 209 (define_mode_attr VSfptype_div  [(V2DF "fp_div_d")
 210                                  (V4SF "fp_div_s")
 211                                  (DF   "fp_div_d")])
 212
 213 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 214 ;; the scalar sqrt
 215 (define_mode_attr VStype_sqrt   [(V2DF "dsqrt")
 216                                  (V4SF "ssqrt")
 217                                  (DF   "dsqrt")])
 218
 219 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
 220                                  (V4SF "fp_sqrt_s")
 221                                  (DF   "fp_sqrt_d")])
 222
 223 ;; Iterator and modes for sp<->dp conversions
 224 ;; Because scalar SF values are represented internally as double, use the
 225 ;; V4SF type to represent this than SF.
 226 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
 227
 228 (define_mode_attr VS_spdp_res [(DF      "V4SF")
 229                                (V4SF    "V2DF")
 230                                (V2DF    "V4SF")])
 231
 232 (define_mode_attr VS_spdp_insn [(DF     "xscvdpsp")
 233                                 (V4SF   "xvcvspdp")
 234                                 (V2DF   "xvcvdpsp")])
 235
 236 (define_mode_attr VS_spdp_type [(DF     "fp")
 237                                 (V4SF   "vecdouble")
 238                                 (V2DF   "vecdouble")])
 239
 240 ;; Map the scalar mode for a vector type
 241 (define_mode_attr VS_scalar [(V1TI      "TI")
 242                              (V2DF      "DF")
 243                              (V2DI      "DI")
 244                              (V4SF      "SF")
 245                              (V4SI      "SI")
 246                              (V8HI      "HI")
 247                              (V16QI     "QI")])
 248
 249 ;; Map to a double-sized vector mode
 250 (define_mode_attr VS_double [(V4SI      "V8SI")
 251                              (V4SF      "V8SF")
 252                              (V2DI      "V4DI")
 253                              (V2DF      "V4DF")
 254                              (V1TI      "V2TI")])
 255
 256 ;; Map register class for 64-bit element in 128-bit vector for direct moves
 257 ;; to/from gprs
 258 (define_mode_attr VS_64dm [(V2DF        "wk")
 259                            (V2DI        "wj")])
 260
 261 ;; Map register class for 64-bit element in 128-bit vector for normal register
 262 ;; to register moves
 263 (define_mode_attr VS_64reg [(V2DF       "ws")
 264                             (V2DI       "wi")])
 265
 266 ;; Iterators for loading constants with xxspltib
 267 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 268 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 269
 270 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 271 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 272 ;; done on ISA 2.07 and not just ISA 3.0.
 273 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 274 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 275
 276 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 277                                      (V8HI "h")
 278                                      (V4SI "w")])
 279
 280 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 281 ;; insert to validate the operand number.
 282 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 283                                          (V8HI  "const_0_to_7_operand")
 284                                          (V4SI  "const_0_to_3_operand")])
 285
 286 ;; Mode attribute to give the constraint for vector extract and insert
 287 ;; operations.
 288 (define_mode_attr VSX_EX [(V16QI "v")
 289                           (V8HI  "v")
 290                           (V4SI  "wa")])
 291
 292 ;; Mode iterator for binary floating types other than double to
 293 ;; optimize convert to that floating point type from an extract
 294 ;; of an integer type
 295 (define_mode_iterator VSX_EXTRACT_FL [SF
 296                                       (IF "FLOAT128_2REG_P (IFmode)")
 297                                       (KF "TARGET_FLOAT128_HW")
 298                                       (TF "FLOAT128_2REG_P (TFmode)
 299                                            || (FLOAT128_IEEE_P (TFmode)
 300                                                && TARGET_FLOAT128_HW)")])
 301
 302 ;; Mode iterator for binary floating types that have a direct conversion
 303 ;; from 64-bit integer to floating point
 304 (define_mode_iterator FL_CONV [SF
 305                                DF
 306                                (KF "TARGET_FLOAT128_HW")
 307                                (TF "TARGET_FLOAT128_HW
 308                                     && FLOAT128_IEEE_P (TFmode)")])
 309
 310 ;; Iterator for the 2 short vector types to do a splat from an integer
 311 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 312
 313 ;; Mode attribute to give the count for the splat instruction to splat
 314 ;; the value in the 64-bit integer slot
 315 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 316
 317 ;; Mode attribute to give the suffix for the splat instruction
 318 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 319
 320 ;; Constants for creating unspecs
 321 (define_c_enum "unspec"
 322   [UNSPEC_VSX_CONCAT
 323    UNSPEC_VSX_CVDPSXWS
 324    UNSPEC_VSX_CVDPUXWS
 325    UNSPEC_VSX_CVSPDP
 326    UNSPEC_VSX_CVSPDPN
 327    UNSPEC_VSX_CVDPSPN
 328    UNSPEC_VSX_CVSXWDP
 329    UNSPEC_VSX_CVUXWDP
 330    UNSPEC_VSX_CVSXDSP
 331    UNSPEC_VSX_CVUXDSP
 332    UNSPEC_VSX_CVSPSXDS
 333    UNSPEC_VSX_CVSPUXDS
 334    UNSPEC_VSX_TDIV
 335    UNSPEC_VSX_TSQRT
 336    UNSPEC_VSX_SET
 337    UNSPEC_VSX_ROUND_I
 338    UNSPEC_VSX_ROUND_IC
 339    UNSPEC_VSX_SLDWI
 340    UNSPEC_VSX_XXSPLTW
 341    UNSPEC_VSX_XXSPLTD
 342    UNSPEC_VSX_DIVSD
 343    UNSPEC_VSX_DIVUD
 344    UNSPEC_VSX_MULSD
 345    UNSPEC_VSX_XVCVSXDDP
 346    UNSPEC_VSX_XVCVUXDDP
 347    UNSPEC_VSX_XVCVDPSXDS
 348    UNSPEC_VSX_XVCVDPUXDS
 349    UNSPEC_VSX_SIGN_EXTEND
 350    UNSPEC_VSX_VSLO
 351    UNSPEC_VSX_EXTRACT
 352    UNSPEC_VSX_SXEXPDP
 353    UNSPEC_VSX_SXSIGDP
 354    UNSPEC_VSX_SIEXPDP
 355    UNSPEC_VSX_SCMPEXPDP
 356    UNSPEC_VSX_STSTDC
 357    UNSPEC_VSX_VXEXP
 358    UNSPEC_VSX_VXSIG
 359    UNSPEC_VSX_VIEXP
 360    UNSPEC_VSX_VTSTDC
 361    UNSPEC_VSX_VEC_INIT
 362    UNSPEC_LXVL
 363    UNSPEC_STXVL
 364    UNSPEC_VCLZLSBB
 365    UNSPEC_VCTZLSBB
 366    UNSPEC_VEXTUBLX
 367    UNSPEC_VEXTUHLX
 368    UNSPEC_VEXTUWLX
 369    UNSPEC_VEXTUBRX
 370    UNSPEC_VEXTUHRX
 371    UNSPEC_VEXTUWRX
 372    UNSPEC_VCMPNEB
 373    UNSPEC_VCMPNEZB
 374    UNSPEC_VCMPNEH
 375    UNSPEC_VCMPNEZH
 376    UNSPEC_VCMPNEW
 377    UNSPEC_VCMPNEZW
 378    UNSPEC_XXEXTRACTUW
 379    UNSPEC_XXINSERTW
 380   ])
 381
 382 ;; VSX moves
 383
 384 ;; The patterns for LE permuted loads and stores come before the general
 385 ;; VSX moves so they match first.
 386 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 387   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
 388         (match_operand:VSX_D 1 "memory_operand" "Z"))]
 389   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 390   "#"
 391   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 392   [(set (match_dup 2)
 393         (vec_select:<MODE>
 394           (match_dup 1)
 395           (parallel [(const_int 1) (const_int 0)])))
 396    (set (match_dup 0)
 397         (vec_select:<MODE>
 398           (match_dup 2)
 399           (parallel [(const_int 1) (const_int 0)])))]
 400   "
 401 {
 402   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 403                                        : operands[0];
 404 }
 405   "
 406   [(set_attr "type" "vecload")
 407    (set_attr "length" "8")])
 408
 409 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 410   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
 411         (match_operand:VSX_W 1 "memory_operand" "Z"))]
 412   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 413   "#"
 414   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 415   [(set (match_dup 2)
 416         (vec_select:<MODE>
 417           (match_dup 1)
 418           (parallel [(const_int 2) (const_int 3)
 419                      (const_int 0) (const_int 1)])))
 420    (set (match_dup 0)
 421         (vec_select:<MODE>
 422           (match_dup 2)
 423           (parallel [(const_int 2) (const_int 3)
 424                      (const_int 0) (const_int 1)])))]
 425   "
 426 {
 427   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 428                                        : operands[0];
 429 }
 430   "
 431   [(set_attr "type" "vecload")
 432    (set_attr "length" "8")])
 433
 434 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 435   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 436         (match_operand:V8HI 1 "memory_operand" "Z"))]
 437   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 438   "#"
 439   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 440   [(set (match_dup 2)
 441         (vec_select:V8HI
 442           (match_dup 1)
 443           (parallel [(const_int 4) (const_int 5)
 444                      (const_int 6) (const_int 7)
 445                      (const_int 0) (const_int 1)
 446                      (const_int 2) (const_int 3)])))
 447    (set (match_dup 0)
 448         (vec_select:V8HI
 449           (match_dup 2)
 450           (parallel [(const_int 4) (const_int 5)
 451                      (const_int 6) (const_int 7)
 452                      (const_int 0) (const_int 1)
 453                      (const_int 2) (const_int 3)])))]
 454   "
 455 {
 456   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 457                                        : operands[0];
 458 }
 459   "
 460   [(set_attr "type" "vecload")
 461    (set_attr "length" "8")])
 462
 463 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 464   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 465         (match_operand:V16QI 1 "memory_operand" "Z"))]
 466   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 467   "#"
 468   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 469   [(set (match_dup 2)
 470         (vec_select:V16QI
 471           (match_dup 1)
 472           (parallel [(const_int 8) (const_int 9)
 473                      (const_int 10) (const_int 11)
 474                      (const_int 12) (const_int 13)
 475                      (const_int 14) (const_int 15)
 476                      (const_int 0) (const_int 1)
 477                      (const_int 2) (const_int 3)
 478                      (const_int 4) (const_int 5)
 479                      (const_int 6) (const_int 7)])))
 480    (set (match_dup 0)
 481         (vec_select:V16QI
 482           (match_dup 2)
 483           (parallel [(const_int 8) (const_int 9)
 484                      (const_int 10) (const_int 11)
 485                      (const_int 12) (const_int 13)
 486                      (const_int 14) (const_int 15)
 487                      (const_int 0) (const_int 1)
 488                      (const_int 2) (const_int 3)
 489                      (const_int 4) (const_int 5)
 490                      (const_int 6) (const_int 7)])))]
 491   "
 492 {
 493   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 494                                        : operands[0];
 495 }
 496   "
 497   [(set_attr "type" "vecload")
 498    (set_attr "length" "8")])
 499
 500 (define_insn "*vsx_le_perm_store_<mode>"
 501   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
 502         (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
 503   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 504   "#"
 505   [(set_attr "type" "vecstore")
 506    (set_attr "length" "12")])
 507
 508 (define_split
 509   [(set (match_operand:VSX_D 0 "memory_operand" "")
 510         (match_operand:VSX_D 1 "vsx_register_operand" ""))]
 511   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 512   [(set (match_dup 2)
 513         (vec_select:<MODE>
 514           (match_dup 1)
 515           (parallel [(const_int 1) (const_int 0)])))
 516    (set (match_dup 0)
 517         (vec_select:<MODE>
 518           (match_dup 2)
 519           (parallel [(const_int 1) (const_int 0)])))]
 520 {
 521   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 522                                        : operands[1];
 523 })
 524
 525 ;; The post-reload split requires that we re-permute the source
 526 ;; register in case it is still live.
 527 (define_split
 528   [(set (match_operand:VSX_D 0 "memory_operand" "")
 529         (match_operand:VSX_D 1 "vsx_register_operand" ""))]
 530   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 531   [(set (match_dup 1)
 532         (vec_select:<MODE>
 533           (match_dup 1)
 534           (parallel [(const_int 1) (const_int 0)])))
 535    (set (match_dup 0)
 536         (vec_select:<MODE>
 537           (match_dup 1)
 538           (parallel [(const_int 1) (const_int 0)])))
 539    (set (match_dup 1)
 540         (vec_select:<MODE>
 541           (match_dup 1)
 542           (parallel [(const_int 1) (const_int 0)])))]
 543   "")
 544
 545 (define_insn "*vsx_le_perm_store_<mode>"
 546   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
 547         (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
 548   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 549   "#"
 550   [(set_attr "type" "vecstore")
 551    (set_attr "length" "12")])
 552
 553 (define_split
 554   [(set (match_operand:VSX_W 0 "memory_operand" "")
 555         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 556   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 557   [(set (match_dup 2)
 558         (vec_select:<MODE>
 559           (match_dup 1)
 560           (parallel [(const_int 2) (const_int 3)
 561                      (const_int 0) (const_int 1)])))
 562    (set (match_dup 0)
 563         (vec_select:<MODE>
 564           (match_dup 2)
 565           (parallel [(const_int 2) (const_int 3)
 566                      (const_int 0) (const_int 1)])))]
 567 {
 568   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 569                                        : operands[1];
 570 })
 571
 572 ;; The post-reload split requires that we re-permute the source
 573 ;; register in case it is still live.
 574 (define_split
 575   [(set (match_operand:VSX_W 0 "memory_operand" "")
 576         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 577   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 578   [(set (match_dup 1)
 579         (vec_select:<MODE>
 580           (match_dup 1)
 581           (parallel [(const_int 2) (const_int 3)
 582                      (const_int 0) (const_int 1)])))
 583    (set (match_dup 0)
 584         (vec_select:<MODE>
 585           (match_dup 1)
 586           (parallel [(const_int 2) (const_int 3)
 587                      (const_int 0) (const_int 1)])))
 588    (set (match_dup 1)
 589         (vec_select:<MODE>
 590           (match_dup 1)
 591           (parallel [(const_int 2) (const_int 3)
 592                      (const_int 0) (const_int 1)])))]
 593   "")
 594
 595 (define_insn "*vsx_le_perm_store_v8hi"
 596   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
 597         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 598   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 599   "#"
 600   [(set_attr "type" "vecstore")
 601    (set_attr "length" "12")])
 602
 603 (define_split
 604   [(set (match_operand:V8HI 0 "memory_operand" "")
 605         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 606   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 607   [(set (match_dup 2)
 608         (vec_select:V8HI
 609           (match_dup 1)
 610           (parallel [(const_int 4) (const_int 5)
 611                      (const_int 6) (const_int 7)
 612                      (const_int 0) (const_int 1)
 613                      (const_int 2) (const_int 3)])))
 614    (set (match_dup 0)
 615         (vec_select:V8HI
 616           (match_dup 2)
 617           (parallel [(const_int 4) (const_int 5)
 618                      (const_int 6) (const_int 7)
 619                      (const_int 0) (const_int 1)
 620                      (const_int 2) (const_int 3)])))]
 621 {
 622   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 623                                        : operands[1];
 624 })
 625
 626 ;; The post-reload split requires that we re-permute the source
 627 ;; register in case it is still live.
 628 (define_split
 629   [(set (match_operand:V8HI 0 "memory_operand" "")
 630         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 631   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 632   [(set (match_dup 1)
 633         (vec_select:V8HI
 634           (match_dup 1)
 635           (parallel [(const_int 4) (const_int 5)
 636                      (const_int 6) (const_int 7)
 637                      (const_int 0) (const_int 1)
 638                      (const_int 2) (const_int 3)])))
 639    (set (match_dup 0)
 640         (vec_select:V8HI
 641           (match_dup 1)
 642           (parallel [(const_int 4) (const_int 5)
 643                      (const_int 6) (const_int 7)
 644                      (const_int 0) (const_int 1)
 645                      (const_int 2) (const_int 3)])))
 646    (set (match_dup 1)
 647         (vec_select:V8HI
 648           (match_dup 1)
 649           (parallel [(const_int 4) (const_int 5)
 650                      (const_int 6) (const_int 7)
 651                      (const_int 0) (const_int 1)
 652                      (const_int 2) (const_int 3)])))]
 653   "")
 654
 655 (define_insn "*vsx_le_perm_store_v16qi"
 656   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
 657         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 658   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 659   "#"
 660   [(set_attr "type" "vecstore")
 661    (set_attr "length" "12")])
 662
 663 (define_split
 664   [(set (match_operand:V16QI 0 "memory_operand" "")
 665         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 666   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 667   [(set (match_dup 2)
 668         (vec_select:V16QI
 669           (match_dup 1)
 670           (parallel [(const_int 8) (const_int 9)
 671                      (const_int 10) (const_int 11)
 672                      (const_int 12) (const_int 13)
 673                      (const_int 14) (const_int 15)
 674                      (const_int 0) (const_int 1)
 675                      (const_int 2) (const_int 3)
 676                      (const_int 4) (const_int 5)
 677                      (const_int 6) (const_int 7)])))
 678    (set (match_dup 0)
 679         (vec_select:V16QI
 680           (match_dup 2)
 681           (parallel [(const_int 8) (const_int 9)
 682                      (const_int 10) (const_int 11)
 683                      (const_int 12) (const_int 13)
 684                      (const_int 14) (const_int 15)
 685                      (const_int 0) (const_int 1)
 686                      (const_int 2) (const_int 3)
 687                      (const_int 4) (const_int 5)
 688                      (const_int 6) (const_int 7)])))]
 689 {
 690   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 691                                        : operands[1];
 692 })
 693
 694 ;; The post-reload split requires that we re-permute the source
 695 ;; register in case it is still live.
 696 (define_split
 697   [(set (match_operand:V16QI 0 "memory_operand" "")
 698         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 699   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 700   [(set (match_dup 1)
 701         (vec_select:V16QI
 702           (match_dup 1)
 703           (parallel [(const_int 8) (const_int 9)
 704                      (const_int 10) (const_int 11)
 705                      (const_int 12) (const_int 13)
 706                      (const_int 14) (const_int 15)
 707                      (const_int 0) (const_int 1)
 708                      (const_int 2) (const_int 3)
 709                      (const_int 4) (const_int 5)
 710                      (const_int 6) (const_int 7)])))
 711    (set (match_dup 0)
 712         (vec_select:V16QI
 713           (match_dup 1)
 714           (parallel [(const_int 8) (const_int 9)
 715                      (const_int 10) (const_int 11)
 716                      (const_int 12) (const_int 13)
 717                      (const_int 14) (const_int 15)
 718                      (const_int 0) (const_int 1)
 719                      (const_int 2) (const_int 3)
 720                      (const_int 4) (const_int 5)
 721                      (const_int 6) (const_int 7)])))
 722    (set (match_dup 1)
 723         (vec_select:V16QI
 724           (match_dup 1)
 725           (parallel [(const_int 8) (const_int 9)
 726                      (const_int 10) (const_int 11)
 727                      (const_int 12) (const_int 13)
 728                      (const_int 14) (const_int 15)
 729                      (const_int 0) (const_int 1)
 730                      (const_int 2) (const_int 3)
 731                      (const_int 4) (const_int 5)
 732                      (const_int 6) (const_int 7)])))]
 733   "")
 734
 735 ;; Little endian word swapping for 128-bit types that are either scalars or the
 736 ;; special V1TI container class, which it is not appropriate to use vec_select
 737 ;; for the type.
 738 (define_insn "*vsx_le_permute_<mode>"
 739   [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
 740         (rotate:VSX_LE_128
 741          (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
 742          (const_int 64)))]
 743   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 744   "@
 745    xxpermdi %x0,%x1,%x1,2
 746    lxvd2x %x0,%y1
 747    stxvd2x %x1,%y0"
 748   [(set_attr "length" "4")
 749    (set_attr "type" "vecperm,vecload,vecstore")])
 750
 751 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
 752   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
 753         (rotate:VSX_LE_128
 754          (rotate:VSX_LE_128
 755           (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
 756           (const_int 64))
 757          (const_int 64)))]
 758   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 759   "@
 760    #
 761    xxlor %x0,%x1"
 762   ""
 763   [(set (match_dup 0) (match_dup 1))]
 764 {
 765   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
 766     {
 767       emit_note (NOTE_INSN_DELETED);
 768       DONE;
 769     }
 770 }
 771   [(set_attr "length" "0,4")
 772    (set_attr "type" "veclogical")])
 773
 774 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 775   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>")
 776         (match_operand:VSX_LE_128 1 "memory_operand" "Z"))]
 777   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 778   "#"
 779   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 780   [(set (match_dup 2)
 781         (rotate:VSX_LE_128 (match_dup 1)
 782                            (const_int 64)))
 783    (set (match_dup 0)
 784         (rotate:VSX_LE_128 (match_dup 2)
 785                            (const_int 64)))]
 786   "
 787 {
 788   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 789                                        : operands[0];
 790 }
 791   "
 792   [(set_attr "type" "vecload")
 793    (set_attr "length" "8")])
 794
 795 (define_insn "*vsx_le_perm_store_<mode>"
 796   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z")
 797         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))]
 798   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 799   "#"
 800   [(set_attr "type" "vecstore")
 801    (set_attr "length" "12")])
 802
 803 (define_split
 804   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
 805         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
 806   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
 807   [(set (match_dup 2)
 808         (rotate:VSX_LE_128 (match_dup 1)
 809                            (const_int 64)))
 810    (set (match_dup 0)
 811         (rotate:VSX_LE_128 (match_dup 2)
 812                            (const_int 64)))]
 813 {
 814   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 815                                        : operands[0];
 816 })
 817
 818 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
 819 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
 820 ;; floating point are handled by the more generic swap elimination pass.
 821 (define_peephole2
 822   [(set (match_operand:TI 0 "vsx_register_operand" "")
 823         (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
 824                    (const_int 64)))
 825    (set (match_operand:TI 2 "vsx_register_operand" "")
 826         (rotate:TI (match_dup 0)
 827                    (const_int 64)))]
 828   "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE && !TARGET_P9_VECTOR
 829    && (rtx_equal_p (operands[0], operands[2])
 830        || peep2_reg_dead_p (2, operands[0]))"
 831    [(set (match_dup 2) (match_dup 1))])
 832
 833 ;; The post-reload split requires that we re-permute the source
 834 ;; register in case it is still live.
 835 (define_split
 836   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
 837         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
 838   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
 839   [(set (match_dup 1)
 840         (rotate:VSX_LE_128 (match_dup 1)
 841                            (const_int 64)))
 842    (set (match_dup 0)
 843         (rotate:VSX_LE_128 (match_dup 1)
 844                            (const_int 64)))
 845    (set (match_dup 1)
 846         (rotate:VSX_LE_128 (match_dup 1)
 847                            (const_int 64)))]
 848   "")
 849
 850 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
 851 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
 852 (define_insn "xxspltib_v16qi"
 853   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 854         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
 855   "TARGET_P9_VECTOR"
 856 {
 857   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
 858   return "xxspltib %x0,%2";
 859 }
 860   [(set_attr "type" "vecperm")])
 861
 862 (define_insn "xxspltib_<mode>_nosplit"
 863   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
 864         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
 865   "TARGET_P9_VECTOR"
 866 {
 867   rtx op1 = operands[1];
 868   int value = 256;
 869   int num_insns = -1;
 870
 871   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
 872       || num_insns != 1)
 873     gcc_unreachable ();
 874
 875   operands[2] = GEN_INT (value & 0xff);
 876   return "xxspltib %x0,%2";
 877 }
 878   [(set_attr "type" "vecperm")])
 879
 880 (define_insn_and_split "*xxspltib_<mode>_split"
 881   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
 882         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
 883   "TARGET_P9_VECTOR"
 884   "#"
 885   "&& 1"
 886   [(const_int 0)]
 887 {
 888   int value = 256;
 889   int num_insns = -1;
 890   rtx op0 = operands[0];
 891   rtx op1 = operands[1];
 892   rtx tmp = ((can_create_pseudo_p ())
 893              ? gen_reg_rtx (V16QImode)
 894              : gen_lowpart (V16QImode, op0));
 895
 896   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
 897       || num_insns != 2)
 898     gcc_unreachable ();
 899
 900   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
 901
 902   if (<MODE>mode == V2DImode)
 903     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
 904
 905   else if (<MODE>mode == V4SImode)
 906     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
 907
 908   else if (<MODE>mode == V8HImode)
 909     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
 910
 911   else
 912     gcc_unreachable ();
 913
 914   DONE;
 915 }
 916   [(set_attr "type" "vecperm")
 917    (set_attr "length" "8")])
 918
 919
 920 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
 921 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
 922 ;; all 1's, since the machine does not have to wait for the previous
 923 ;; instruction using the register being set (such as a store waiting on a slow
 924 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
 925
 926 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
 927 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
 928 ;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
 929 (define_insn "*vsx_mov<mode>_64bit"
 930   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
 931                "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
 932                 ?&r,       ??r,       ??Y,       ??r,       wo,        v,
 933                 ?<VSa>,    *r,        v,         ??r,       wZ,        v")
 934
 935         (match_operand:VSX_M 1 "input_operand"
 936                "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
 937                 wQ,        Y,         r,         r,         wE,        jwM,
 938                 ?jwM,      jwM,       W,         W,         v,         wZ"))]
 939
 940   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
 941    && (register_operand (operands[0], <MODE>mode)
 942        || register_operand (operands[1], <MODE>mode))"
 943 {
 944   return rs6000_output_move_128bit (operands);
 945 }
 946   [(set_attr "type"
 947                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
 948                 store,     load,      store,     *,         vecsimple, vecsimple,
 949                 vecsimple, *,         *,         *,         vecstore,  vecload")
 950
 951    (set_attr "length"
 952                "4,         4,         4,         8,         4,         8,
 953                 8,         8,         8,         8,         4,         4,
 954                 4,         8,         20,        20,        4,         4")])
 955
 956 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
 957 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
 958 ;;              LVX (VMX)  STVX (VMX)
 959 (define_insn "*vsx_mov<mode>_32bit"
 960   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
 961                "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       ??r,
 962                 wo,        v,         ?<VSa>,    *r,        v,         ??r,
 963                 wZ,        v")
 964
 965         (match_operand:VSX_M 1 "input_operand"
 966                "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
 967                 wE,        jwM,       ?jwM,      jwM,       W,         W,
 968                 v,         wZ"))]
 969
 970   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
 971    && (register_operand (operands[0], <MODE>mode)
 972        || register_operand (operands[1], <MODE>mode))"
 973 {
 974   return rs6000_output_move_128bit (operands);
 975 }
 976   [(set_attr "type"
 977                "vecstore,  vecload,   vecsimple, load,      store,    *,
 978                 vecsimple, vecsimple, vecsimple, *,         *,        *,
 979                 vecstore,  vecload")
 980
 981    (set_attr "length"
 982                "4,         4,         4,         16,        16,        16,
 983                 4,         4,         4,         16,        20,        32,
 984                 4,         4")])
 985
 986 ;; Explicit  load/store expanders for the builtin functions
 987 (define_expand "vsx_load_<mode>"
 988   [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
 989         (match_operand:VSX_M 1 "memory_operand" ""))]
 990   "VECTOR_MEM_VSX_P (<MODE>mode)"
 991 {
 992   /* Expand to swaps if needed, prior to swap optimization.  */
 993   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
 994     {
 995       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
 996       DONE;
 997     }
 998 })
 999
1000 (define_expand "vsx_store_<mode>"
1001   [(set (match_operand:VSX_M 0 "memory_operand" "")
1002         (match_operand:VSX_M 1 "vsx_register_operand" ""))]
1003   "VECTOR_MEM_VSX_P (<MODE>mode)"
1004 {
1005   /* Expand to swaps if needed, prior to swap optimization.  */
1006   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1007     {
1008       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1009       DONE;
1010     }
1011 })
1012
1013 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1014 ;; when you really want their element-reversing behavior.
1015 (define_insn "vsx_ld_elemrev_v2di"
1016   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1017         (vec_select:V2DI
1018           (match_operand:V2DI 1 "memory_operand" "Z")
1019           (parallel [(const_int 1) (const_int 0)])))]
1020   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1021   "lxvd2x %x0,%y1"
1022   [(set_attr "type" "vecload")])
1023
1024 (define_insn "vsx_ld_elemrev_v2df"
1025   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1026         (vec_select:V2DF
1027           (match_operand:V2DF 1 "memory_operand" "Z")
1028           (parallel [(const_int 1) (const_int 0)])))]
1029   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1030   "lxvd2x %x0,%y1"
1031   [(set_attr "type" "vecload")])
1032
1033 (define_insn "vsx_ld_elemrev_v4si"
1034   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1035         (vec_select:V4SI
1036           (match_operand:V4SI 1 "memory_operand" "Z")
1037           (parallel [(const_int 3) (const_int 2)
1038                      (const_int 1) (const_int 0)])))]
1039   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1040   "lxvw4x %x0,%y1"
1041   [(set_attr "type" "vecload")])
1042
1043 (define_insn "vsx_ld_elemrev_v4sf"
1044   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1045         (vec_select:V4SF
1046           (match_operand:V4SF 1 "memory_operand" "Z")
1047           (parallel [(const_int 3) (const_int 2)
1048                      (const_int 1) (const_int 0)])))]
1049   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1050   "lxvw4x %x0,%y1"
1051   [(set_attr "type" "vecload")])
1052
1053 (define_insn "vsx_ld_elemrev_v8hi"
1054   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1055         (vec_select:V8HI
1056           (match_operand:V8HI 1 "memory_operand" "Z")
1057           (parallel [(const_int 7) (const_int 6)
1058                      (const_int 5) (const_int 4)
1059                      (const_int 3) (const_int 2)
1060                      (const_int 1) (const_int 0)])))]
1061   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1062   "lxvh8x %x0,%y1"
1063   [(set_attr "type" "vecload")])
1064
1065 (define_insn "vsx_ld_elemrev_v16qi"
1066   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1067         (vec_select:V16QI
1068           (match_operand:V16QI 1 "memory_operand" "Z")
1069           (parallel [(const_int 15) (const_int 14)
1070                      (const_int 13) (const_int 12)
1071                      (const_int 11) (const_int 10)
1072                      (const_int  9) (const_int  8)
1073                      (const_int  7) (const_int  6)
1074                      (const_int  5) (const_int  4)
1075                      (const_int  3) (const_int  2)
1076                      (const_int  1) (const_int  0)])))]
1077   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1078   "lxvb16x %x0,%y1"
1079   [(set_attr "type" "vecload")])
1080
1081 (define_insn "vsx_st_elemrev_v2df"
1082   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1083         (vec_select:V2DF
1084           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1085           (parallel [(const_int 1) (const_int 0)])))]
1086   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1087   "stxvd2x %x1,%y0"
1088   [(set_attr "type" "vecstore")])
1089
1090 (define_insn "vsx_st_elemrev_v2di"
1091   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1092         (vec_select:V2DI
1093           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1094           (parallel [(const_int 1) (const_int 0)])))]
1095   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1096   "stxvd2x %x1,%y0"
1097   [(set_attr "type" "vecstore")])
1098
1099 (define_insn "vsx_st_elemrev_v4sf"
1100   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1101         (vec_select:V4SF
1102           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1103           (parallel [(const_int 3) (const_int 2)
1104                      (const_int 1) (const_int 0)])))]
1105   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1106   "stxvw4x %x1,%y0"
1107   [(set_attr "type" "vecstore")])
1108
1109 (define_insn "vsx_st_elemrev_v4si"
1110   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1111         (vec_select:V4SI
1112           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1113           (parallel [(const_int 3) (const_int 2)
1114                      (const_int 1) (const_int 0)])))]
1115   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1116   "stxvw4x %x1,%y0"
1117   [(set_attr "type" "vecstore")])
1118
1119 (define_insn "vsx_st_elemrev_v8hi"
1120   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1121         (vec_select:V8HI
1122           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1123           (parallel [(const_int 7) (const_int 6)
1124                      (const_int 5) (const_int 4)
1125                      (const_int 3) (const_int 2)
1126                      (const_int 1) (const_int 0)])))]
1127   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1128   "stxvh8x %x1,%y0"
1129   [(set_attr "type" "vecstore")])
1130
1131 (define_insn "vsx_st_elemrev_v16qi"
1132   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1133         (vec_select:V16QI
1134           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1135           (parallel [(const_int 15) (const_int 14)
1136                      (const_int 13) (const_int 12)
1137                      (const_int 11) (const_int 10)
1138                      (const_int  9) (const_int  8)
1139                      (const_int  7) (const_int  6)
1140                      (const_int  5) (const_int  4)
1141                      (const_int  3) (const_int  2)
1142                      (const_int  1) (const_int  0)])))]
1143   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1144   "stxvb16x %x1,%y0"
1145   [(set_attr "type" "vecstore")])
1146
1147 \f
1148 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1149 ;; instructions are now combined with the insn for the traditional floating
1150 ;; point unit.
1151 (define_insn "*vsx_add<mode>3"
1152   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1153         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1154                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1155   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1156   "xvadd<VSs> %x0,%x1,%x2"
1157   [(set_attr "type" "<VStype_simple>")
1158    (set_attr "fp_type" "<VSfptype_simple>")])
1159
1160 (define_insn "*vsx_sub<mode>3"
1161   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1162         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1163                      (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1164   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1165   "xvsub<VSs> %x0,%x1,%x2"
1166   [(set_attr "type" "<VStype_simple>")
1167    (set_attr "fp_type" "<VSfptype_simple>")])
1168
1169 (define_insn "*vsx_mul<mode>3"
1170   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1171         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1172                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1173   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1174   "xvmul<VSs> %x0,%x1,%x2"
1175   [(set_attr "type" "<VStype_simple>")
1176    (set_attr "fp_type" "<VSfptype_mul>")])
1177
1178 ; Emulate vector with scalar for vec_mul in V2DImode
1179 (define_insn_and_split "vsx_mul_v2di"
1180   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1181         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1182                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1183                      UNSPEC_VSX_MULSD))]
1184   "VECTOR_MEM_VSX_P (V2DImode)"
1185   "#"
1186   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1187   [(const_int 0)]
1188   "
1189 {
1190   rtx op0 = operands[0];
1191   rtx op1 = operands[1];
1192   rtx op2 = operands[2];
1193   rtx op3 = gen_reg_rtx (DImode);
1194   rtx op4 = gen_reg_rtx (DImode);
1195   rtx op5 = gen_reg_rtx (DImode);
1196   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1197   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1198   emit_insn (gen_muldi3 (op5, op3, op4));
1199   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1200   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1201   emit_insn (gen_muldi3 (op3, op3, op4));
1202   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1203   DONE;
1204 }"
1205   [(set_attr "type" "mul")])
1206
1207 (define_insn "*vsx_div<mode>3"
1208   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1209         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1210                    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1211   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1212   "xvdiv<VSs> %x0,%x1,%x2"
1213   [(set_attr "type" "<VStype_div>")
1214    (set_attr "fp_type" "<VSfptype_div>")])
1215
1216 ; Emulate vector with scalar for vec_div in V2DImode
1217 (define_insn_and_split "vsx_div_v2di"
1218   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1219         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1220                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1221                      UNSPEC_VSX_DIVSD))]
1222   "VECTOR_MEM_VSX_P (V2DImode)"
1223   "#"
1224   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1225   [(const_int 0)]
1226   "
1227 {
1228   rtx op0 = operands[0];
1229   rtx op1 = operands[1];
1230   rtx op2 = operands[2];
1231   rtx op3 = gen_reg_rtx (DImode);
1232   rtx op4 = gen_reg_rtx (DImode);
1233   rtx op5 = gen_reg_rtx (DImode);
1234   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1235   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1236   emit_insn (gen_divdi3 (op5, op3, op4));
1237   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1238   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1239   emit_insn (gen_divdi3 (op3, op3, op4));
1240   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1241   DONE;
1242 }"
1243   [(set_attr "type" "div")])
1244
1245 (define_insn_and_split "vsx_udiv_v2di"
1246   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1247         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1248                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1249                      UNSPEC_VSX_DIVUD))]
1250   "VECTOR_MEM_VSX_P (V2DImode)"
1251   "#"
1252   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1253   [(const_int 0)]
1254   "
1255 {
1256   rtx op0 = operands[0];
1257   rtx op1 = operands[1];
1258   rtx op2 = operands[2];
1259   rtx op3 = gen_reg_rtx (DImode);
1260   rtx op4 = gen_reg_rtx (DImode);
1261   rtx op5 = gen_reg_rtx (DImode);
1262   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1263   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1264   emit_insn (gen_udivdi3 (op5, op3, op4));
1265   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1266   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1267   emit_insn (gen_udivdi3 (op3, op3, op4));
1268   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1269   DONE;
1270 }"
1271   [(set_attr "type" "div")])
1272
1273 ;; *tdiv* instruction returning the FG flag
1274 (define_expand "vsx_tdiv<mode>3_fg"
1275   [(set (match_dup 3)
1276         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1277                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
1278                      UNSPEC_VSX_TDIV))
1279    (set (match_operand:SI 0 "gpc_reg_operand" "")
1280         (gt:SI (match_dup 3)
1281                (const_int 0)))]
1282   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1283 {
1284   operands[3] = gen_reg_rtx (CCFPmode);
1285 })
1286
1287 ;; *tdiv* instruction returning the FE flag
1288 (define_expand "vsx_tdiv<mode>3_fe"
1289   [(set (match_dup 3)
1290         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1291                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
1292                      UNSPEC_VSX_TDIV))
1293    (set (match_operand:SI 0 "gpc_reg_operand" "")
1294         (eq:SI (match_dup 3)
1295                (const_int 0)))]
1296   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1297 {
1298   operands[3] = gen_reg_rtx (CCFPmode);
1299 })
1300
1301 (define_insn "*vsx_tdiv<mode>3_internal"
1302   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1303         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1304                       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1305                    UNSPEC_VSX_TDIV))]
1306   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1307   "x<VSv>tdiv<VSs> %0,%x1,%x2"
1308   [(set_attr "type" "<VStype_simple>")
1309    (set_attr "fp_type" "<VSfptype_simple>")])
1310
1311 (define_insn "vsx_fre<mode>2"
1312   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1313         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1314                       UNSPEC_FRES))]
1315   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1316   "xvre<VSs> %x0,%x1"
1317   [(set_attr "type" "<VStype_simple>")
1318    (set_attr "fp_type" "<VSfptype_simple>")])
1319
1320 (define_insn "*vsx_neg<mode>2"
1321   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1322         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1323   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1324   "xvneg<VSs> %x0,%x1"
1325   [(set_attr "type" "<VStype_simple>")
1326    (set_attr "fp_type" "<VSfptype_simple>")])
1327
1328 (define_insn "*vsx_abs<mode>2"
1329   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1330         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1331   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1332   "xvabs<VSs> %x0,%x1"
1333   [(set_attr "type" "<VStype_simple>")
1334    (set_attr "fp_type" "<VSfptype_simple>")])
1335
1336 (define_insn "vsx_nabs<mode>2"
1337   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1338         (neg:VSX_F
1339          (abs:VSX_F
1340           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1341   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1342   "xvnabs<VSs> %x0,%x1"
1343   [(set_attr "type" "<VStype_simple>")
1344    (set_attr "fp_type" "<VSfptype_simple>")])
1345
1346 (define_insn "vsx_smax<mode>3"
1347   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1348         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1349                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1350   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1351   "xvmax<VSs> %x0,%x1,%x2"
1352   [(set_attr "type" "<VStype_simple>")
1353    (set_attr "fp_type" "<VSfptype_simple>")])
1354
1355 (define_insn "*vsx_smin<mode>3"
1356   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1357         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1358                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1359   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1360   "xvmin<VSs> %x0,%x1,%x2"
1361   [(set_attr "type" "<VStype_simple>")
1362    (set_attr "fp_type" "<VSfptype_simple>")])
1363
1364 (define_insn "*vsx_sqrt<mode>2"
1365   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1366         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1367   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1368   "xvsqrt<VSs> %x0,%x1"
1369   [(set_attr "type" "<VStype_sqrt>")
1370    (set_attr "fp_type" "<VSfptype_sqrt>")])
1371
1372 (define_insn "*vsx_rsqrte<mode>2"
1373   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1374         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1375                       UNSPEC_RSQRT))]
1376   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1377   "xvrsqrte<VSs> %x0,%x1"
1378   [(set_attr "type" "<VStype_simple>")
1379    (set_attr "fp_type" "<VSfptype_simple>")])
1380
1381 ;; *tsqrt* returning the fg flag
1382 (define_expand "vsx_tsqrt<mode>2_fg"
1383   [(set (match_dup 2)
1384         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1385                      UNSPEC_VSX_TSQRT))
1386    (set (match_operand:SI 0 "gpc_reg_operand" "")
1387         (gt:SI (match_dup 2)
1388                (const_int 0)))]
1389   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1390 {
1391   operands[2] = gen_reg_rtx (CCFPmode);
1392 })
1393
1394 ;; *tsqrt* returning the fe flag
1395 (define_expand "vsx_tsqrt<mode>2_fe"
1396   [(set (match_dup 2)
1397         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1398                      UNSPEC_VSX_TSQRT))
1399    (set (match_operand:SI 0 "gpc_reg_operand" "")
1400         (eq:SI (match_dup 2)
1401                (const_int 0)))]
1402   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1403 {
1404   operands[2] = gen_reg_rtx (CCFPmode);
1405 })
1406
1407 (define_insn "*vsx_tsqrt<mode>2_internal"
1408   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1409         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1410                      UNSPEC_VSX_TSQRT))]
1411   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1412   "x<VSv>tsqrt<VSs> %0,%x1"
1413   [(set_attr "type" "<VStype_simple>")
1414    (set_attr "fp_type" "<VSfptype_simple>")])
1415
1416 ;; Fused vector multiply/add instructions. Support the classical Altivec
1417 ;; versions of fma, which allows the target to be a separate register from the
1418 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1419 ;; multiply.
1420
1421 (define_insn "*vsx_fmav4sf4"
1422   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1423         (fma:V4SF
1424           (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1425           (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1426           (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1427   "VECTOR_UNIT_VSX_P (V4SFmode)"
1428   "@
1429    xvmaddasp %x0,%x1,%x2
1430    xvmaddmsp %x0,%x1,%x3
1431    xvmaddasp %x0,%x1,%x2
1432    xvmaddmsp %x0,%x1,%x3
1433    vmaddfp %0,%1,%2,%3"
1434   [(set_attr "type" "vecfloat")])
1435
1436 (define_insn "*vsx_fmav2df4"
1437   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1438         (fma:V2DF
1439           (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1440           (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1441           (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1442   "VECTOR_UNIT_VSX_P (V2DFmode)"
1443   "@
1444    xvmaddadp %x0,%x1,%x2
1445    xvmaddmdp %x0,%x1,%x3
1446    xvmaddadp %x0,%x1,%x2
1447    xvmaddmdp %x0,%x1,%x3"
1448   [(set_attr "type" "vecdouble")])
1449
1450 (define_insn "*vsx_fms<mode>4"
1451   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1452         (fma:VSX_F
1453           (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1454           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1455           (neg:VSX_F
1456             (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1457   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1458   "@
1459    xvmsuba<VSs> %x0,%x1,%x2
1460    xvmsubm<VSs> %x0,%x1,%x3
1461    xvmsuba<VSs> %x0,%x1,%x2
1462    xvmsubm<VSs> %x0,%x1,%x3"
1463   [(set_attr "type" "<VStype_mul>")])
1464
1465 (define_insn "*vsx_nfma<mode>4"
1466   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1467         (neg:VSX_F
1468          (fma:VSX_F
1469           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1470           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1471           (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1472   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1473   "@
1474    xvnmadda<VSs> %x0,%x1,%x2
1475    xvnmaddm<VSs> %x0,%x1,%x3
1476    xvnmadda<VSs> %x0,%x1,%x2
1477    xvnmaddm<VSs> %x0,%x1,%x3"
1478   [(set_attr "type" "<VStype_mul>")
1479    (set_attr "fp_type" "<VSfptype_mul>")])
1480
1481 (define_insn "*vsx_nfmsv4sf4"
1482   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1483         (neg:V4SF
1484          (fma:V4SF
1485            (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1486            (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1487            (neg:V4SF
1488              (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1489   "VECTOR_UNIT_VSX_P (V4SFmode)"
1490   "@
1491    xvnmsubasp %x0,%x1,%x2
1492    xvnmsubmsp %x0,%x1,%x3
1493    xvnmsubasp %x0,%x1,%x2
1494    xvnmsubmsp %x0,%x1,%x3
1495    vnmsubfp %0,%1,%2,%3"
1496   [(set_attr "type" "vecfloat")])
1497
1498 (define_insn "*vsx_nfmsv2df4"
1499   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1500         (neg:V2DF
1501          (fma:V2DF
1502            (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1503            (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1504            (neg:V2DF
1505              (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1506   "VECTOR_UNIT_VSX_P (V2DFmode)"
1507   "@
1508    xvnmsubadp %x0,%x1,%x2
1509    xvnmsubmdp %x0,%x1,%x3
1510    xvnmsubadp %x0,%x1,%x2
1511    xvnmsubmdp %x0,%x1,%x3"
1512   [(set_attr "type" "vecdouble")])
1513
1514 ;; Vector conditional expressions (no scalar version for these instructions)
1515 (define_insn "vsx_eq<mode>"
1516   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1517         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1518                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1519   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1520   "xvcmpeq<VSs> %x0,%x1,%x2"
1521   [(set_attr "type" "<VStype_simple>")
1522    (set_attr "fp_type" "<VSfptype_simple>")])
1523
1524 (define_insn "vsx_gt<mode>"
1525   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1526         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1527                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1528   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1529   "xvcmpgt<VSs> %x0,%x1,%x2"
1530   [(set_attr "type" "<VStype_simple>")
1531    (set_attr "fp_type" "<VSfptype_simple>")])
1532
1533 (define_insn "*vsx_ge<mode>"
1534   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1535         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1536                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1537   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1538   "xvcmpge<VSs> %x0,%x1,%x2"
1539   [(set_attr "type" "<VStype_simple>")
1540    (set_attr "fp_type" "<VSfptype_simple>")])
1541
1542 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1543 ;; indicate a combined status
1544 (define_insn "*vsx_eq_<mode>_p"
1545   [(set (reg:CC CR6_REGNO)
1546         (unspec:CC
1547          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1548                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1549          UNSPEC_PREDICATE))
1550    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1551         (eq:VSX_F (match_dup 1)
1552                   (match_dup 2)))]
1553   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1554   "xvcmpeq<VSs>. %x0,%x1,%x2"
1555   [(set_attr "type" "<VStype_simple>")])
1556
1557 (define_insn "*vsx_gt_<mode>_p"
1558   [(set (reg:CC CR6_REGNO)
1559         (unspec:CC
1560          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1561                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1562          UNSPEC_PREDICATE))
1563    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1564         (gt:VSX_F (match_dup 1)
1565                   (match_dup 2)))]
1566   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1567   "xvcmpgt<VSs>. %x0,%x1,%x2"
1568   [(set_attr "type" "<VStype_simple>")])
1569
1570 (define_insn "*vsx_ge_<mode>_p"
1571   [(set (reg:CC CR6_REGNO)
1572         (unspec:CC
1573          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1574                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1575          UNSPEC_PREDICATE))
1576    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1577         (ge:VSX_F (match_dup 1)
1578                   (match_dup 2)))]
1579   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1580   "xvcmpge<VSs>. %x0,%x1,%x2"
1581   [(set_attr "type" "<VStype_simple>")])
1582
1583 ;; Vector select
1584 (define_insn "*vsx_xxsel<mode>"
1585   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1586         (if_then_else:VSX_L
1587          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1588                 (match_operand:VSX_L 4 "zero_constant" ""))
1589          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1590          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1591   "VECTOR_MEM_VSX_P (<MODE>mode)"
1592   "xxsel %x0,%x3,%x2,%x1"
1593   [(set_attr "type" "vecmove")])
1594
1595 (define_insn "*vsx_xxsel<mode>_uns"
1596   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1597         (if_then_else:VSX_L
1598          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1599                    (match_operand:VSX_L 4 "zero_constant" ""))
1600          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1601          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1602   "VECTOR_MEM_VSX_P (<MODE>mode)"
1603   "xxsel %x0,%x3,%x2,%x1"
1604   [(set_attr "type" "vecmove")])
1605
1606 ;; Copy sign
1607 (define_insn "vsx_copysign<mode>3"
1608   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1609         (unspec:VSX_F
1610          [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1611           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1612          UNSPEC_COPYSIGN))]
1613   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1614   "xvcpsgn<VSs> %x0,%x2,%x1"
1615   [(set_attr "type" "<VStype_simple>")
1616    (set_attr "fp_type" "<VSfptype_simple>")])
1617
1618 ;; For the conversions, limit the register class for the integer value to be
1619 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1620 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1621 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1622 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
1623 ;; in allowing virtual registers.
1624 (define_insn "vsx_float<VSi><mode>2"
1625   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1626         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1627   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1628   "xvcvsx<VSc><VSs> %x0,%x1"
1629   [(set_attr "type" "<VStype_simple>")
1630    (set_attr "fp_type" "<VSfptype_simple>")])
1631
1632 (define_insn "vsx_floatuns<VSi><mode>2"
1633   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1634         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1635   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1636   "xvcvux<VSc><VSs> %x0,%x1"
1637   [(set_attr "type" "<VStype_simple>")
1638    (set_attr "fp_type" "<VSfptype_simple>")])
1639
1640 (define_insn "vsx_fix_trunc<mode><VSi>2"
1641   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1642         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1643   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1644   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1645   [(set_attr "type" "<VStype_simple>")
1646    (set_attr "fp_type" "<VSfptype_simple>")])
1647
1648 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1649   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1650         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1651   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1652   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1653   [(set_attr "type" "<VStype_simple>")
1654    (set_attr "fp_type" "<VSfptype_simple>")])
1655
1656 ;; Math rounding functions
1657 (define_insn "vsx_x<VSv>r<VSs>i"
1658   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1659         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1660                       UNSPEC_VSX_ROUND_I))]
1661   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1662   "x<VSv>r<VSs>i %x0,%x1"
1663   [(set_attr "type" "<VStype_simple>")
1664    (set_attr "fp_type" "<VSfptype_simple>")])
1665
1666 (define_insn "vsx_x<VSv>r<VSs>ic"
1667   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1668         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1669                       UNSPEC_VSX_ROUND_IC))]
1670   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1671   "x<VSv>r<VSs>ic %x0,%x1"
1672   [(set_attr "type" "<VStype_simple>")
1673    (set_attr "fp_type" "<VSfptype_simple>")])
1674
1675 (define_insn "vsx_btrunc<mode>2"
1676   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1677         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1678   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1679   "xvr<VSs>iz %x0,%x1"
1680   [(set_attr "type" "<VStype_simple>")
1681    (set_attr "fp_type" "<VSfptype_simple>")])
1682
1683 (define_insn "*vsx_b2trunc<mode>2"
1684   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1685         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1686                       UNSPEC_FRIZ))]
1687   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1688   "x<VSv>r<VSs>iz %x0,%x1"
1689   [(set_attr "type" "<VStype_simple>")
1690    (set_attr "fp_type" "<VSfptype_simple>")])
1691
1692 (define_insn "vsx_floor<mode>2"
1693   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1694         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1695                       UNSPEC_FRIM))]
1696   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1697   "xvr<VSs>im %x0,%x1"
1698   [(set_attr "type" "<VStype_simple>")
1699    (set_attr "fp_type" "<VSfptype_simple>")])
1700
1701 (define_insn "vsx_ceil<mode>2"
1702   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1703         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1704                       UNSPEC_FRIP))]
1705   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1706   "xvr<VSs>ip %x0,%x1"
1707   [(set_attr "type" "<VStype_simple>")
1708    (set_attr "fp_type" "<VSfptype_simple>")])
1709
1710 \f
1711 ;; VSX convert to/from double vector
1712
1713 ;; Convert between single and double precision
1714 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1715 ;; scalar single precision instructions internally use the double format.
1716 ;; Prefer the altivec registers, since we likely will need to do a vperm
1717 (define_insn "vsx_<VS_spdp_insn>"
1718   [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1719         (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1720                               UNSPEC_VSX_CVSPDP))]
1721   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1722   "<VS_spdp_insn> %x0,%x1"
1723   [(set_attr "type" "<VS_spdp_type>")])
1724
1725 ;; xscvspdp, represent the scalar SF type as V4SF
1726 (define_insn "vsx_xscvspdp"
1727   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1728         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1729                    UNSPEC_VSX_CVSPDP))]
1730   "VECTOR_UNIT_VSX_P (V4SFmode)"
1731   "xscvspdp %x0,%x1"
1732   [(set_attr "type" "fp")])
1733
1734 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1735 ;; format of scalars is actually DF.
1736 (define_insn "vsx_xscvdpsp_scalar"
1737   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1738         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1739                      UNSPEC_VSX_CVSPDP))]
1740   "VECTOR_UNIT_VSX_P (V4SFmode)"
1741   "xscvdpsp %x0,%x1"
1742   [(set_attr "type" "fp")])
1743
1744 ;; Same as vsx_xscvspdp, but use SF as the type
1745 (define_insn "vsx_xscvspdp_scalar2"
1746   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
1747         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1748                    UNSPEC_VSX_CVSPDP))]
1749   "VECTOR_UNIT_VSX_P (V4SFmode)"
1750   "xscvspdp %x0,%x1"
1751   [(set_attr "type" "fp")])
1752
1753 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1754 (define_insn "vsx_xscvdpspn"
1755   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww,?ww")
1756         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1757                      UNSPEC_VSX_CVDPSPN))]
1758   "TARGET_XSCVDPSPN"
1759   "xscvdpspn %x0,%x1"
1760   [(set_attr "type" "fp")])
1761
1762 (define_insn "vsx_xscvspdpn"
1763   [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?ws")
1764         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
1765                    UNSPEC_VSX_CVSPDPN))]
1766   "TARGET_XSCVSPDPN"
1767   "xscvspdpn %x0,%x1"
1768   [(set_attr "type" "fp")])
1769
1770 (define_insn "vsx_xscvdpspn_scalar"
1771   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa")
1772         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww,ww")]
1773                      UNSPEC_VSX_CVDPSPN))]
1774   "TARGET_XSCVDPSPN"
1775   "xscvdpspn %x0,%x1"
1776   [(set_attr "type" "fp")])
1777
1778 ;; Used by direct move to move a SFmode value from GPR to VSX register
1779 (define_insn "vsx_xscvspdpn_directmove"
1780   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1781         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
1782                    UNSPEC_VSX_CVSPDPN))]
1783   "TARGET_XSCVSPDPN"
1784   "xscvspdpn %x0,%x1"
1785   [(set_attr "type" "fp")])
1786
1787 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1788
1789 (define_expand "vsx_xvcvsxddp_scale"
1790   [(match_operand:V2DF 0 "vsx_register_operand" "")
1791    (match_operand:V2DI 1 "vsx_register_operand" "")
1792    (match_operand:QI 2 "immediate_operand" "")]
1793   "VECTOR_UNIT_VSX_P (V2DFmode)"
1794 {
1795   rtx op0 = operands[0];
1796   rtx op1 = operands[1];
1797   int scale = INTVAL(operands[2]);
1798   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1799   if (scale != 0)
1800     rs6000_scale_v2df (op0, op0, -scale);
1801   DONE;
1802 })
1803
1804 (define_insn "vsx_xvcvsxddp"
1805   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1806         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1807                      UNSPEC_VSX_XVCVSXDDP))]
1808   "VECTOR_UNIT_VSX_P (V2DFmode)"
1809   "xvcvsxddp %x0,%x1"
1810   [(set_attr "type" "vecdouble")])
1811
1812 (define_expand "vsx_xvcvuxddp_scale"
1813   [(match_operand:V2DF 0 "vsx_register_operand" "")
1814    (match_operand:V2DI 1 "vsx_register_operand" "")
1815    (match_operand:QI 2 "immediate_operand" "")]
1816   "VECTOR_UNIT_VSX_P (V2DFmode)"
1817 {
1818   rtx op0 = operands[0];
1819   rtx op1 = operands[1];
1820   int scale = INTVAL(operands[2]);
1821   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1822   if (scale != 0)
1823     rs6000_scale_v2df (op0, op0, -scale);
1824   DONE;
1825 })
1826
1827 (define_insn "vsx_xvcvuxddp"
1828   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1829         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1830                      UNSPEC_VSX_XVCVUXDDP))]
1831   "VECTOR_UNIT_VSX_P (V2DFmode)"
1832   "xvcvuxddp %x0,%x1"
1833   [(set_attr "type" "vecdouble")])
1834
1835 (define_expand "vsx_xvcvdpsxds_scale"
1836   [(match_operand:V2DI 0 "vsx_register_operand" "")
1837    (match_operand:V2DF 1 "vsx_register_operand" "")
1838    (match_operand:QI 2 "immediate_operand" "")]
1839   "VECTOR_UNIT_VSX_P (V2DFmode)"
1840 {
1841   rtx op0 = operands[0];
1842   rtx op1 = operands[1];
1843   rtx tmp;
1844   int scale = INTVAL (operands[2]);
1845   if (scale == 0)
1846     tmp = op1;
1847   else
1848     {
1849       tmp  = gen_reg_rtx (V2DFmode);
1850       rs6000_scale_v2df (tmp, op1, scale);
1851     }
1852   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1853   DONE;
1854 })
1855
1856 (define_insn "vsx_xvcvdpsxds"
1857   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1858         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1859                      UNSPEC_VSX_XVCVDPSXDS))]
1860   "VECTOR_UNIT_VSX_P (V2DFmode)"
1861   "xvcvdpsxds %x0,%x1"
1862   [(set_attr "type" "vecdouble")])
1863
1864 (define_expand "vsx_xvcvdpuxds_scale"
1865   [(match_operand:V2DI 0 "vsx_register_operand" "")
1866    (match_operand:V2DF 1 "vsx_register_operand" "")
1867    (match_operand:QI 2 "immediate_operand" "")]
1868   "VECTOR_UNIT_VSX_P (V2DFmode)"
1869 {
1870   rtx op0 = operands[0];
1871   rtx op1 = operands[1];
1872   rtx tmp;
1873   int scale = INTVAL (operands[2]);
1874   if (scale == 0)
1875     tmp = op1;
1876   else
1877     {
1878       tmp = gen_reg_rtx (V2DFmode);
1879       rs6000_scale_v2df (tmp, op1, scale);
1880     }
1881   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1882   DONE;
1883 })
1884
1885 (define_insn "vsx_xvcvdpuxds"
1886   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1887         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1888                      UNSPEC_VSX_XVCVDPUXDS))]
1889   "VECTOR_UNIT_VSX_P (V2DFmode)"
1890   "xvcvdpuxds %x0,%x1"
1891   [(set_attr "type" "vecdouble")])
1892
1893 ;; Convert from 64-bit to 32-bit types
1894 ;; Note, favor the Altivec registers since the usual use of these instructions
1895 ;; is in vector converts and we need to use the Altivec vperm instruction.
1896
1897 (define_insn "vsx_xvcvdpsxws"
1898   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1899         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1900                      UNSPEC_VSX_CVDPSXWS))]
1901   "VECTOR_UNIT_VSX_P (V2DFmode)"
1902   "xvcvdpsxws %x0,%x1"
1903   [(set_attr "type" "vecdouble")])
1904
1905 (define_insn "vsx_xvcvdpuxws"
1906   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1907         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1908                      UNSPEC_VSX_CVDPUXWS))]
1909   "VECTOR_UNIT_VSX_P (V2DFmode)"
1910   "xvcvdpuxws %x0,%x1"
1911   [(set_attr "type" "vecdouble")])
1912
1913 (define_insn "vsx_xvcvsxdsp"
1914   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
1915         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
1916                      UNSPEC_VSX_CVSXDSP))]
1917   "VECTOR_UNIT_VSX_P (V2DFmode)"
1918   "xvcvsxdsp %x0,%x1"
1919   [(set_attr "type" "vecfloat")])
1920
1921 (define_insn "vsx_xvcvuxdsp"
1922   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
1923         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
1924                      UNSPEC_VSX_CVUXDSP))]
1925   "VECTOR_UNIT_VSX_P (V2DFmode)"
1926   "xvcvuxdsp %x0,%x1"
1927   [(set_attr "type" "vecdouble")])
1928
1929 ;; Convert from 32-bit to 64-bit types
1930 ;; Provide both vector and scalar targets
1931 (define_insn "vsx_xvcvsxwdp"
1932   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1933         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1934                      UNSPEC_VSX_CVSXWDP))]
1935   "VECTOR_UNIT_VSX_P (V2DFmode)"
1936   "xvcvsxwdp %x0,%x1"
1937   [(set_attr "type" "vecdouble")])
1938
1939 (define_insn "vsx_xvcvsxwdp_df"
1940   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1941         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
1942                    UNSPEC_VSX_CVSXWDP))]
1943   "TARGET_VSX"
1944   "xvcvsxwdp %x0,%x1"
1945   [(set_attr "type" "vecdouble")])
1946
1947 (define_insn "vsx_xvcvuxwdp"
1948   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1949         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1950                      UNSPEC_VSX_CVUXWDP))]
1951   "VECTOR_UNIT_VSX_P (V2DFmode)"
1952   "xvcvuxwdp %x0,%x1"
1953   [(set_attr "type" "vecdouble")])
1954
1955 (define_insn "vsx_xvcvuxwdp_df"
1956   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1957         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
1958                    UNSPEC_VSX_CVUXWDP))]
1959   "TARGET_VSX"
1960   "xvcvuxwdp %x0,%x1"
1961   [(set_attr "type" "vecdouble")])
1962
1963 (define_insn "vsx_xvcvspsxds"
1964   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1965         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1966                      UNSPEC_VSX_CVSPSXDS))]
1967   "VECTOR_UNIT_VSX_P (V2DFmode)"
1968   "xvcvspsxds %x0,%x1"
1969   [(set_attr "type" "vecdouble")])
1970
1971 (define_insn "vsx_xvcvspuxds"
1972   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1973         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1974                      UNSPEC_VSX_CVSPUXDS))]
1975   "VECTOR_UNIT_VSX_P (V2DFmode)"
1976   "xvcvspuxds %x0,%x1"
1977   [(set_attr "type" "vecdouble")])
1978
1979 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1980 ;; since the xvrdpiz instruction does not truncate the value if the floating
1981 ;; point value is < LONG_MIN or > LONG_MAX.
1982 (define_insn "*vsx_float_fix_v2df2"
1983   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1984         (float:V2DF
1985          (fix:V2DI
1986           (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
1987   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1988    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
1989    && !flag_trapping_math && TARGET_FRIZ"
1990   "xvrdpiz %x0,%x1"
1991   [(set_attr "type" "vecdouble")
1992    (set_attr "fp_type" "fp_addsub_d")])
1993
1994 \f
1995 ;; Permute operations
1996
1997 ;; Build a V2DF/V2DI vector from two scalars
1998 (define_insn "vsx_concat_<mode>"
1999   [(set (match_operand:VSX_D 0 "gpc_reg_operand" "=<VSa>,we")
2000         (vec_concat:VSX_D
2001          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VS_64reg>,b")
2002          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VS_64reg>,b")))]
2003   "VECTOR_MEM_VSX_P (<MODE>mode)"
2004 {
2005   if (which_alternative == 0)
2006     return (BYTES_BIG_ENDIAN
2007             ? "xxpermdi %x0,%x1,%x2,0"
2008             : "xxpermdi %x0,%x2,%x1,0");
2009
2010   else if (which_alternative == 1)
2011     return (BYTES_BIG_ENDIAN
2012             ? "mtvsrdd %x0,%1,%2"
2013             : "mtvsrdd %x0,%2,%1");
2014
2015   else
2016     gcc_unreachable ();
2017 }
2018   [(set_attr "type" "vecperm")])
2019
2020 ;; Special purpose concat using xxpermdi to glue two single precision values
2021 ;; together, relying on the fact that internally scalar floats are represented
2022 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2023 (define_insn "vsx_concat_v2sf"
2024   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2025         (unspec:V2DF
2026          [(match_operand:SF 1 "vsx_register_operand" "ww")
2027           (match_operand:SF 2 "vsx_register_operand" "ww")]
2028          UNSPEC_VSX_CONCAT))]
2029   "VECTOR_MEM_VSX_P (V2DFmode)"
2030 {
2031   if (BYTES_BIG_ENDIAN)
2032     return "xxpermdi %x0,%x1,%x2,0";
2033   else
2034     return "xxpermdi %x0,%x2,%x1,0";
2035 }
2036   [(set_attr "type" "vecperm")])
2037
2038 ;; V4SImode initialization splitter
2039 (define_insn_and_split "vsx_init_v4si"
2040   [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2041         (unspec:V4SI
2042          [(match_operand:SI 1 "reg_or_cint_operand" "rn")
2043           (match_operand:SI 2 "reg_or_cint_operand" "rn")
2044           (match_operand:SI 3 "reg_or_cint_operand" "rn")
2045           (match_operand:SI 4 "reg_or_cint_operand" "rn")]
2046          UNSPEC_VSX_VEC_INIT))
2047    (clobber (match_scratch:DI 5 "=&r"))
2048    (clobber (match_scratch:DI 6 "=&r"))]
2049    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2050    "#"
2051    "&& reload_completed"
2052    [(const_int 0)]
2053 {
2054   rs6000_split_v4si_init (operands);
2055   DONE;
2056 })
2057
2058 ;; xxpermdi for little endian loads and stores.  We need several of
2059 ;; these since the form of the PARALLEL differs by mode.
2060 (define_insn "*vsx_xxpermdi2_le_<mode>"
2061   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2062         (vec_select:VSX_D
2063           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2064           (parallel [(const_int 1) (const_int 0)])))]
2065   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2066   "xxpermdi %x0,%x1,%x1,2"
2067   [(set_attr "type" "vecperm")])
2068
2069 (define_insn "*vsx_xxpermdi4_le_<mode>"
2070   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2071         (vec_select:VSX_W
2072           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2073           (parallel [(const_int 2) (const_int 3)
2074                      (const_int 0) (const_int 1)])))]
2075   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2076   "xxpermdi %x0,%x1,%x1,2"
2077   [(set_attr "type" "vecperm")])
2078
2079 (define_insn "*vsx_xxpermdi8_le_V8HI"
2080   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2081         (vec_select:V8HI
2082           (match_operand:V8HI 1 "vsx_register_operand" "wa")
2083           (parallel [(const_int 4) (const_int 5)
2084                      (const_int 6) (const_int 7)
2085                      (const_int 0) (const_int 1)
2086                      (const_int 2) (const_int 3)])))]
2087   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
2088   "xxpermdi %x0,%x1,%x1,2"
2089   [(set_attr "type" "vecperm")])
2090
2091 (define_insn "*vsx_xxpermdi16_le_V16QI"
2092   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2093         (vec_select:V16QI
2094           (match_operand:V16QI 1 "vsx_register_operand" "wa")
2095           (parallel [(const_int 8) (const_int 9)
2096                      (const_int 10) (const_int 11)
2097                      (const_int 12) (const_int 13)
2098                      (const_int 14) (const_int 15)
2099                      (const_int 0) (const_int 1)
2100                      (const_int 2) (const_int 3)
2101                      (const_int 4) (const_int 5)
2102                      (const_int 6) (const_int 7)])))]
2103   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
2104   "xxpermdi %x0,%x1,%x1,2"
2105   [(set_attr "type" "vecperm")])
2106
2107 ;; lxvd2x for little endian loads.  We need several of
2108 ;; these since the form of the PARALLEL differs by mode.
2109 (define_insn "*vsx_lxvd2x2_le_<mode>"
2110   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2111         (vec_select:VSX_D
2112           (match_operand:VSX_D 1 "memory_operand" "Z")
2113           (parallel [(const_int 1) (const_int 0)])))]
2114   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2115   "lxvd2x %x0,%y1"
2116   [(set_attr "type" "vecload")])
2117
2118 (define_insn "*vsx_lxvd2x4_le_<mode>"
2119   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2120         (vec_select:VSX_W
2121           (match_operand:VSX_W 1 "memory_operand" "Z")
2122           (parallel [(const_int 2) (const_int 3)
2123                      (const_int 0) (const_int 1)])))]
2124   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2125   "lxvd2x %x0,%y1"
2126   [(set_attr "type" "vecload")])
2127
2128 (define_insn "*vsx_lxvd2x8_le_V8HI"
2129   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2130         (vec_select:V8HI
2131           (match_operand:V8HI 1 "memory_operand" "Z")
2132           (parallel [(const_int 4) (const_int 5)
2133                      (const_int 6) (const_int 7)
2134                      (const_int 0) (const_int 1)
2135                      (const_int 2) (const_int 3)])))]
2136   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2137   "lxvd2x %x0,%y1"
2138   [(set_attr "type" "vecload")])
2139
2140 (define_insn "*vsx_lxvd2x16_le_V16QI"
2141   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2142         (vec_select:V16QI
2143           (match_operand:V16QI 1 "memory_operand" "Z")
2144           (parallel [(const_int 8) (const_int 9)
2145                      (const_int 10) (const_int 11)
2146                      (const_int 12) (const_int 13)
2147                      (const_int 14) (const_int 15)
2148                      (const_int 0) (const_int 1)
2149                      (const_int 2) (const_int 3)
2150                      (const_int 4) (const_int 5)
2151                      (const_int 6) (const_int 7)])))]
2152   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2153   "lxvd2x %x0,%y1"
2154   [(set_attr "type" "vecload")])
2155
2156 ;; stxvd2x for little endian stores.  We need several of
2157 ;; these since the form of the PARALLEL differs by mode.
2158 (define_insn "*vsx_stxvd2x2_le_<mode>"
2159   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
2160         (vec_select:VSX_D
2161           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2162           (parallel [(const_int 1) (const_int 0)])))]
2163   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2164   "stxvd2x %x1,%y0"
2165   [(set_attr "type" "vecstore")])
2166
2167 (define_insn "*vsx_stxvd2x4_le_<mode>"
2168   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
2169         (vec_select:VSX_W
2170           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2171           (parallel [(const_int 2) (const_int 3)
2172                      (const_int 0) (const_int 1)])))]
2173   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2174   "stxvd2x %x1,%y0"
2175   [(set_attr "type" "vecstore")])
2176
2177 (define_insn "*vsx_stxvd2x8_le_V8HI"
2178   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
2179         (vec_select:V8HI
2180           (match_operand:V8HI 1 "vsx_register_operand" "wa")
2181           (parallel [(const_int 4) (const_int 5)
2182                      (const_int 6) (const_int 7)
2183                      (const_int 0) (const_int 1)
2184                      (const_int 2) (const_int 3)])))]
2185   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2186   "stxvd2x %x1,%y0"
2187   [(set_attr "type" "vecstore")])
2188
2189 (define_insn "*vsx_stxvd2x16_le_V16QI"
2190   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
2191         (vec_select:V16QI
2192           (match_operand:V16QI 1 "vsx_register_operand" "wa")
2193           (parallel [(const_int 8) (const_int 9)
2194                      (const_int 10) (const_int 11)
2195                      (const_int 12) (const_int 13)
2196                      (const_int 14) (const_int 15)
2197                      (const_int 0) (const_int 1)
2198                      (const_int 2) (const_int 3)
2199                      (const_int 4) (const_int 5)
2200                      (const_int 6) (const_int 7)])))]
2201   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2202   "stxvd2x %x1,%y0"
2203   [(set_attr "type" "vecstore")])
2204
2205 ;; Convert a TImode value into V1TImode
2206 (define_expand "vsx_set_v1ti"
2207   [(match_operand:V1TI 0 "nonimmediate_operand" "")
2208    (match_operand:V1TI 1 "nonimmediate_operand" "")
2209    (match_operand:TI 2 "input_operand" "")
2210    (match_operand:QI 3 "u5bit_cint_operand" "")]
2211   "VECTOR_MEM_VSX_P (V1TImode)"
2212 {
2213   if (operands[3] != const0_rtx)
2214     gcc_unreachable ();
2215
2216   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
2217   DONE;
2218 })
2219
2220 ;; Set the element of a V2DI/VD2F mode
2221 (define_insn "vsx_set_<mode>"
2222   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
2223         (unspec:VSX_D
2224          [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
2225           (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
2226           (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
2227          UNSPEC_VSX_SET))]
2228   "VECTOR_MEM_VSX_P (<MODE>mode)"
2229 {
2230   int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
2231   if (INTVAL (operands[3]) == idx_first)
2232     return \"xxpermdi %x0,%x2,%x1,1\";
2233   else if (INTVAL (operands[3]) == 1 - idx_first)
2234     return \"xxpermdi %x0,%x1,%x2,0\";
2235   else
2236     gcc_unreachable ();
2237 }
2238   [(set_attr "type" "vecperm")])
2239
2240 ;; Extract a DF/DI element from V2DF/V2DI
2241 ;; Optimize cases were we can do a simple or direct move.
2242 ;; Or see if we can avoid doing the move at all
2243
2244 ;; There are some unresolved problems with reload that show up if an Altivec
2245 ;; register was picked.  Limit the scalar value to FPRs for now.
2246
2247 (define_insn "vsx_extract_<mode>"
2248   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
2249
2250         (vec_select:<VS_scalar>
2251          (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
2252
2253          (parallel
2254           [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
2255   "VECTOR_MEM_VSX_P (<MODE>mode)"
2256 {
2257   int element = INTVAL (operands[2]);
2258   int op0_regno = REGNO (operands[0]);
2259   int op1_regno = REGNO (operands[1]);
2260   int fldDM;
2261
2262   gcc_assert (IN_RANGE (element, 0, 1));
2263   gcc_assert (VSX_REGNO_P (op1_regno));
2264
2265   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
2266     {
2267       if (op0_regno == op1_regno)
2268         return ASM_COMMENT_START " vec_extract to same register";
2269
2270       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
2271                && TARGET_POWERPC64)
2272         return "mfvsrd %0,%x1";
2273
2274       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
2275         return "fmr %0,%1";
2276
2277       else if (VSX_REGNO_P (op0_regno))
2278         return "xxlor %x0,%x1,%x1";
2279
2280       else
2281         gcc_unreachable ();
2282     }
2283
2284   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
2285            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
2286     return "mfvsrld %0,%x1";
2287
2288   else if (VSX_REGNO_P (op0_regno))
2289     {
2290       fldDM = element << 1;
2291       if (!BYTES_BIG_ENDIAN)
2292         fldDM = 3 - fldDM;
2293       operands[3] = GEN_INT (fldDM);
2294       return "xxpermdi %x0,%x1,%x1,%3";
2295     }
2296
2297   else
2298     gcc_unreachable ();
2299 }
2300   [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
2301
2302 ;; Optimize extracting a single scalar element from memory.
2303 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
2304   [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
2305         (vec_select:<VSX_D:VS_scalar>
2306          (match_operand:VSX_D 1 "memory_operand" "m,m")
2307          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
2308    (clobber (match_scratch:P 3 "=&b,&b"))]
2309   "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
2310   "#"
2311   "&& reload_completed"
2312   [(set (match_dup 0) (match_dup 4))]
2313 {
2314   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2315                                            operands[3], <VSX_D:VS_scalar>mode);
2316 }
2317   [(set_attr "type" "fpload,load")
2318    (set_attr "length" "8")])
2319
2320 ;; Optimize storing a single scalar element that is the right location to
2321 ;; memory
2322 (define_insn "*vsx_extract_<mode>_store"
2323   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
2324         (vec_select:<VS_scalar>
2325          (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
2326          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
2327   "VECTOR_MEM_VSX_P (<MODE>mode)"
2328   "@
2329    stfd%U0%X0 %1,%0
2330    stxsd%U0x %x1,%y0
2331    stxsd %1,%0"
2332   [(set_attr "type" "fpstore")
2333    (set_attr "length" "4")])
2334
2335 ;; Variable V2DI/V2DF extract shift
2336 (define_insn "vsx_vslo_<mode>"
2337   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
2338         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
2339                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
2340                             UNSPEC_VSX_VSLO))]
2341   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2342   "vslo %0,%1,%2"
2343   [(set_attr "type" "vecperm")])
2344
2345 ;; Variable V2DI/V2DF extract
2346 (define_insn_and_split "vsx_extract_<mode>_var"
2347   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
2348         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
2349                              (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2350                             UNSPEC_VSX_EXTRACT))
2351    (clobber (match_scratch:DI 3 "=r,&b,&b"))
2352    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
2353   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2354   "#"
2355   "&& reload_completed"
2356   [(const_int 0)]
2357 {
2358   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2359                                 operands[3], operands[4]);
2360   DONE;
2361 })
2362
2363 ;; Extract a SF element from V4SF
2364 (define_insn_and_split "vsx_extract_v4sf"
2365   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2366         (vec_select:SF
2367          (match_operand:V4SF 1 "vsx_register_operand" "wa")
2368          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
2369    (clobber (match_scratch:V4SF 3 "=0"))]
2370   "VECTOR_UNIT_VSX_P (V4SFmode)"
2371   "#"
2372   "&& 1"
2373   [(const_int 0)]
2374 {
2375   rtx op0 = operands[0];
2376   rtx op1 = operands[1];
2377   rtx op2 = operands[2];
2378   rtx op3 = operands[3];
2379   rtx tmp;
2380   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
2381
2382   if (ele == 0)
2383     tmp = op1;
2384   else
2385     {
2386       if (GET_CODE (op3) == SCRATCH)
2387         op3 = gen_reg_rtx (V4SFmode);
2388       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
2389       tmp = op3;
2390     }
2391   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
2392   DONE;
2393 }
2394   [(set_attr "length" "8")
2395    (set_attr "type" "fp")])
2396
2397 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
2398   [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
2399         (vec_select:SF
2400          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
2401          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
2402    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
2403   "VECTOR_MEM_VSX_P (V4SFmode)"
2404   "#"
2405   "&& reload_completed"
2406   [(set (match_dup 0) (match_dup 4))]
2407 {
2408   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2409                                            operands[3], SFmode);
2410 }
2411   [(set_attr "type" "fpload,fpload,fpload,load")
2412    (set_attr "length" "8")])
2413
2414 ;; Variable V4SF extract
2415 (define_insn_and_split "vsx_extract_v4sf_var"
2416   [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
2417         (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
2418                     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2419                    UNSPEC_VSX_EXTRACT))
2420    (clobber (match_scratch:DI 3 "=r,&b,&b"))
2421    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
2422   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
2423   "#"
2424   "&& reload_completed"
2425   [(const_int 0)]
2426 {
2427   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2428                                 operands[3], operands[4]);
2429   DONE;
2430 })
2431
2432 ;; Expand the builtin form of xxpermdi to canonical rtl.
2433 (define_expand "vsx_xxpermdi_<mode>"
2434   [(match_operand:VSX_L 0 "vsx_register_operand")
2435    (match_operand:VSX_L 1 "vsx_register_operand")
2436    (match_operand:VSX_L 2 "vsx_register_operand")
2437    (match_operand:QI 3 "u5bit_cint_operand")]
2438   "VECTOR_MEM_VSX_P (<MODE>mode)"
2439 {
2440   rtx target = operands[0];
2441   rtx op0 = operands[1];
2442   rtx op1 = operands[2];
2443   int mask = INTVAL (operands[3]);
2444   rtx perm0 = GEN_INT ((mask >> 1) & 1);
2445   rtx perm1 = GEN_INT ((mask & 1) + 2);
2446   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2447
2448   if (<MODE>mode == V2DFmode)
2449     gen = gen_vsx_xxpermdi2_v2df_1;
2450   else
2451     {
2452       gen = gen_vsx_xxpermdi2_v2di_1;
2453       if (<MODE>mode != V2DImode)
2454         {
2455           target = gen_lowpart (V2DImode, target);
2456           op0 = gen_lowpart (V2DImode, op0);
2457           op1 = gen_lowpart (V2DImode, op1);
2458         }
2459     }
2460   emit_insn (gen (target, op0, op1, perm0, perm1));
2461   DONE;
2462 })
2463
2464 ;; Special version of xxpermdi that retains big-endian semantics.
2465 (define_expand "vsx_xxpermdi_<mode>_be"
2466   [(match_operand:VSX_L 0 "vsx_register_operand")
2467    (match_operand:VSX_L 1 "vsx_register_operand")
2468    (match_operand:VSX_L 2 "vsx_register_operand")
2469    (match_operand:QI 3 "u5bit_cint_operand")]
2470   "VECTOR_MEM_VSX_P (<MODE>mode)"
2471 {
2472   rtx target = operands[0];
2473   rtx op0 = operands[1];
2474   rtx op1 = operands[2];
2475   int mask = INTVAL (operands[3]);
2476   rtx perm0 = GEN_INT ((mask >> 1) & 1);
2477   rtx perm1 = GEN_INT ((mask & 1) + 2);
2478   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2479
2480   if (<MODE>mode == V2DFmode)
2481     gen = gen_vsx_xxpermdi2_v2df_1;
2482   else
2483     {
2484       gen = gen_vsx_xxpermdi2_v2di_1;
2485       if (<MODE>mode != V2DImode)
2486         {
2487           target = gen_lowpart (V2DImode, target);
2488           op0 = gen_lowpart (V2DImode, op0);
2489           op1 = gen_lowpart (V2DImode, op1);
2490         }
2491     }
2492   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
2493      transformation we don't want; it is necessary for
2494      rs6000_expand_vec_perm_const_1 but not for this use.  So we
2495      prepare for that by reversing the transformation here.  */
2496   if (BYTES_BIG_ENDIAN)
2497     emit_insn (gen (target, op0, op1, perm0, perm1));
2498   else
2499     {
2500       rtx p0 = GEN_INT (3 - INTVAL (perm1));
2501       rtx p1 = GEN_INT (3 - INTVAL (perm0));
2502       emit_insn (gen (target, op1, op0, p0, p1));
2503     }
2504   DONE;
2505 })
2506
2507 (define_insn "vsx_xxpermdi2_<mode>_1"
2508   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
2509         (vec_select:VSX_D
2510           (vec_concat:<VS_double>
2511             (match_operand:VSX_D 1 "vsx_register_operand" "wd")
2512             (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
2513           (parallel [(match_operand 3 "const_0_to_1_operand" "")
2514                      (match_operand 4 "const_2_to_3_operand" "")])))]
2515   "VECTOR_MEM_VSX_P (<MODE>mode)"
2516 {
2517   int op3, op4, mask;
2518
2519   /* For little endian, swap operands and invert/swap selectors
2520      to get the correct xxpermdi.  The operand swap sets up the
2521      inputs as a little endian array.  The selectors are swapped
2522      because they are defined to use big endian ordering.  The
2523      selectors are inverted to get the correct doublewords for
2524      little endian ordering.  */
2525   if (BYTES_BIG_ENDIAN)
2526     {
2527       op3 = INTVAL (operands[3]);
2528       op4 = INTVAL (operands[4]);
2529     }
2530   else
2531     {
2532       op3 = 3 - INTVAL (operands[4]);
2533       op4 = 3 - INTVAL (operands[3]);
2534     }
2535
2536   mask = (op3 << 1) | (op4 - 2);
2537   operands[3] = GEN_INT (mask);
2538
2539   if (BYTES_BIG_ENDIAN)
2540     return "xxpermdi %x0,%x1,%x2,%3";
2541   else
2542     return "xxpermdi %x0,%x2,%x1,%3";
2543 }
2544   [(set_attr "type" "vecperm")])
2545
2546 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
2547 ;; none of the small types were allowed in a vector register, so we had to
2548 ;; extract to a DImode and either do a direct move or store.
2549 (define_expand  "vsx_extract_<mode>"
2550   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
2551                    (vec_select:<VS_scalar>
2552                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
2553                     (parallel [(match_operand:QI 2 "const_int_operand")])))
2554               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
2555   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2556 {
2557   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
2558   if (TARGET_VSX_SMALL_INTEGER && TARGET_P9_VECTOR)
2559     {
2560       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
2561                                             operands[2]));
2562       DONE;
2563     }
2564 })
2565
2566 (define_insn "vsx_extract_<mode>_p9"
2567   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
2568         (vec_select:<VS_scalar>
2569          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
2570          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
2571    (clobber (match_scratch:SI 3 "=r,X"))]
2572   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
2573    && TARGET_VSX_SMALL_INTEGER"
2574 {
2575   if (which_alternative == 0)
2576     return "#";
2577
2578   else
2579     {
2580       HOST_WIDE_INT elt = INTVAL (operands[2]);
2581       HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
2582                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
2583                                : elt);
2584
2585       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
2586       HOST_WIDE_INT offset = unit_size * elt_adj;
2587
2588       operands[2] = GEN_INT (offset);
2589       if (unit_size == 4)
2590         return "xxextractuw %x0,%x1,%2";
2591       else
2592         return "vextractu<wd> %0,%1,%2";
2593     }
2594 }
2595   [(set_attr "type" "vecsimple")])
2596
2597 (define_split
2598   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
2599         (vec_select:<VS_scalar>
2600          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
2601          (parallel [(match_operand:QI 2 "const_int_operand")])))
2602    (clobber (match_operand:SI 3 "int_reg_operand"))]
2603   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
2604    && TARGET_VSX_SMALL_INTEGER && reload_completed"
2605   [(const_int 0)]
2606 {
2607   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
2608   rtx op1 = operands[1];
2609   rtx op2 = operands[2];
2610   rtx op3 = operands[3];
2611   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
2612
2613   emit_move_insn (op3, GEN_INT (offset));
2614   if (VECTOR_ELT_ORDER_BIG)
2615     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
2616   else
2617     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
2618   DONE;
2619 })
2620
2621 ;; Optimize zero extracts to eliminate the AND after the extract.
2622 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
2623   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
2624         (zero_extend:DI
2625          (vec_select:<VS_scalar>
2626           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
2627           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
2628    (clobber (match_scratch:SI 3 "=r,X"))]
2629   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
2630    && TARGET_VSX_SMALL_INTEGER"
2631   "#"
2632   "&& reload_completed"
2633   [(parallel [(set (match_dup 4)
2634                    (vec_select:<VS_scalar>
2635                     (match_dup 1)
2636                     (parallel [(match_dup 2)])))
2637               (clobber (match_dup 3))])]
2638 {
2639   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
2640 })
2641
2642 ;; Optimize stores to use the ISA 3.0 scalar store instructions
2643 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
2644   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
2645         (vec_select:<VS_scalar>
2646          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
2647          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
2648    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
2649    (clobber (match_scratch:SI 4 "=X,&r"))]
2650   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
2651    && TARGET_VSX_SMALL_INTEGER"
2652   "#"
2653   "&& reload_completed"
2654   [(parallel [(set (match_dup 3)
2655                    (vec_select:<VS_scalar>
2656                     (match_dup 1)
2657                     (parallel [(match_dup 2)])))
2658               (clobber (match_dup 4))])
2659    (set (match_dup 0)
2660         (match_dup 3))])
2661
2662 (define_insn_and_split  "*vsx_extract_si"
2663   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
2664         (vec_select:SI
2665          (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
2666          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
2667    (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
2668   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT
2669    && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)"
2670   "#"
2671   "&& reload_completed"
2672   [(const_int 0)]
2673 {
2674   rtx dest = operands[0];
2675   rtx src = operands[1];
2676   rtx element = operands[2];
2677   rtx vec_tmp = operands[3];
2678   int value;
2679
2680   if (!VECTOR_ELT_ORDER_BIG)
2681     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
2682
2683   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
2684      instruction.  */
2685   value = INTVAL (element);
2686   if (value != 1)
2687     {
2688       if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER)
2689         {
2690           rtx si_tmp = gen_rtx_REG (SImode, REGNO (vec_tmp));
2691           emit_insn (gen_vsx_extract_v4si_p9 (si_tmp,src, element));
2692         }
2693       else
2694         emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
2695     }
2696   else
2697     vec_tmp = src;
2698
2699   if (MEM_P (operands[0]))
2700     {
2701       if (can_create_pseudo_p ())
2702         dest = rs6000_address_for_fpconvert (dest);
2703
2704       if (TARGET_VSX_SMALL_INTEGER)
2705         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
2706       else
2707         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
2708     }
2709
2710   else if (TARGET_VSX_SMALL_INTEGER)
2711     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
2712   else
2713     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
2714                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
2715
2716   DONE;
2717 }
2718   [(set_attr "type" "mftgpr,vecperm,fpstore")
2719    (set_attr "length" "8")])
2720
2721 (define_insn_and_split  "*vsx_extract_<mode>_p8"
2722   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
2723         (vec_select:<VS_scalar>
2724          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
2725          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
2726    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
2727   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
2728    && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)"
2729   "#"
2730   "&& reload_completed"
2731   [(const_int 0)]
2732 {
2733   rtx dest = operands[0];
2734   rtx src = operands[1];
2735   rtx element = operands[2];
2736   rtx vec_tmp = operands[3];
2737   int value;
2738
2739   if (!VECTOR_ELT_ORDER_BIG)
2740     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
2741
2742   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
2743      instruction.  */
2744   value = INTVAL (element);
2745   if (<MODE>mode == V16QImode)
2746     {
2747       if (value != 7)
2748         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
2749       else
2750         vec_tmp = src;
2751     }
2752   else if (<MODE>mode == V8HImode)
2753     {
2754       if (value != 3)
2755         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
2756       else
2757         vec_tmp = src;
2758     }
2759   else
2760     gcc_unreachable ();
2761
2762   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
2763                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
2764   DONE;
2765 }
2766   [(set_attr "type" "mftgpr")])
2767
2768 ;; Optimize extracting a single scalar element from memory.
2769 (define_insn_and_split "*vsx_extract_<mode>_load"
2770   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
2771         (vec_select:<VS_scalar>
2772          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
2773          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
2774    (clobber (match_scratch:DI 3 "=&b"))]
2775   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2776   "#"
2777   "&& reload_completed"
2778   [(set (match_dup 0) (match_dup 4))]
2779 {
2780   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2781                                            operands[3], <VS_scalar>mode);
2782 }
2783   [(set_attr "type" "load")
2784    (set_attr "length" "8")])
2785
2786 ;; Variable V16QI/V8HI/V4SI extract
2787 (define_insn_and_split "vsx_extract_<mode>_var"
2788   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
2789         (unspec:<VS_scalar>
2790          [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
2791           (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2792          UNSPEC_VSX_EXTRACT))
2793    (clobber (match_scratch:DI 3 "=r,r,&b"))
2794    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
2795   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2796   "#"
2797   "&& reload_completed"
2798   [(const_int 0)]
2799 {
2800   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2801                                 operands[3], operands[4]);
2802   DONE;
2803 })
2804
2805 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
2806   [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
2807         (zero_extend:SDI
2808          (unspec:<VSX_EXTRACT_I:VS_scalar>
2809           [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
2810            (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2811           UNSPEC_VSX_EXTRACT)))
2812    (clobber (match_scratch:DI 3 "=r,r,&b"))
2813    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
2814   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2815   "#"
2816   "&& reload_completed"
2817   [(const_int 0)]
2818 {
2819   machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
2820   rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
2821                                 operands[1], operands[2],
2822                                 operands[3], operands[4]);
2823   DONE;
2824 })
2825
2826 ;; VSX_EXTRACT optimizations
2827 ;; Optimize double d = (double) vec_extract (vi, <n>)
2828 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
2829 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
2830   [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
2831         (any_float:DF
2832          (vec_select:SI
2833           (match_operand:V4SI 1 "gpc_reg_operand" "v")
2834           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
2835    (clobber (match_scratch:V4SI 3 "=v"))]
2836   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2837   "#"
2838   "&& 1"
2839   [(const_int 0)]
2840 {
2841   rtx dest = operands[0];
2842   rtx src = operands[1];
2843   rtx element = operands[2];
2844   rtx v4si_tmp = operands[3];
2845   int value;
2846
2847   if (!VECTOR_ELT_ORDER_BIG)
2848     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
2849
2850   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
2851      instruction.  */
2852   value = INTVAL (element);
2853   if (value != 0)
2854     {
2855       if (GET_CODE (v4si_tmp) == SCRATCH)
2856         v4si_tmp = gen_reg_rtx (V4SImode);
2857       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
2858     }
2859   else
2860     v4si_tmp = src;
2861
2862   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
2863   DONE;
2864 })
2865
2866 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
2867 ;; where <type> is a floating point type that supported by the hardware that is
2868 ;; not double.  First convert the value to double, and then to the desired
2869 ;; type.
2870 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
2871   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
2872         (any_float:VSX_EXTRACT_FL
2873          (vec_select:SI
2874           (match_operand:V4SI 1 "gpc_reg_operand" "v")
2875           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
2876    (clobber (match_scratch:V4SI 3 "=v"))
2877    (clobber (match_scratch:DF 4 "=ws"))]
2878   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2879   "#"
2880   "&& 1"
2881   [(const_int 0)]
2882 {
2883   rtx dest = operands[0];
2884   rtx src = operands[1];
2885   rtx element = operands[2];
2886   rtx v4si_tmp = operands[3];
2887   rtx df_tmp = operands[4];
2888   int value;
2889
2890   if (!VECTOR_ELT_ORDER_BIG)
2891     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
2892
2893   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
2894      instruction.  */
2895   value = INTVAL (element);
2896   if (value != 0)
2897     {
2898       if (GET_CODE (v4si_tmp) == SCRATCH)
2899         v4si_tmp = gen_reg_rtx (V4SImode);
2900       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
2901     }
2902   else
2903     v4si_tmp = src;
2904
2905   if (GET_CODE (df_tmp) == SCRATCH)
2906     df_tmp = gen_reg_rtx (DFmode);
2907
2908   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
2909
2910   if (<MODE>mode == SFmode)
2911     emit_insn (gen_truncdfsf2 (dest, df_tmp));
2912   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
2913     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
2914   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
2915            && TARGET_FLOAT128_HW)
2916     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
2917   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
2918     emit_insn (gen_extenddfif2 (dest, df_tmp));
2919   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
2920     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
2921   else
2922     gcc_unreachable ();
2923
2924   DONE;
2925 })
2926
2927 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
2928 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
2929 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
2930 ;; vector short or vector unsigned short.
2931 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
2932   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
2933         (float:FL_CONV
2934          (vec_select:<VSX_EXTRACT_I:VS_scalar>
2935           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
2936           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
2937    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
2938   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
2939    && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER"
2940   "#"
2941   "&& reload_completed"
2942   [(parallel [(set (match_dup 3)
2943                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
2944                     (match_dup 1)
2945                     (parallel [(match_dup 2)])))
2946               (clobber (scratch:SI))])
2947    (set (match_dup 4)
2948         (sign_extend:DI (match_dup 3)))
2949    (set (match_dup 0)
2950         (float:<FL_CONV:MODE> (match_dup 4)))]
2951 {
2952   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
2953 })
2954
2955 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
2956   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
2957         (unsigned_float:FL_CONV
2958          (vec_select:<VSX_EXTRACT_I:VS_scalar>
2959           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
2960           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
2961    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
2962   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
2963    && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER"
2964   "#"
2965   "&& reload_completed"
2966   [(parallel [(set (match_dup 3)
2967                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
2968                     (match_dup 1)
2969                     (parallel [(match_dup 2)])))
2970               (clobber (scratch:SI))])
2971    (set (match_dup 0)
2972         (float:<FL_CONV:MODE> (match_dup 4)))]
2973 {
2974   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
2975 })
2976
2977 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
2978 (define_insn "vsx_set_<mode>_p9"
2979   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
2980         (unspec:VSX_EXTRACT_I
2981          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
2982           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
2983           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
2984          UNSPEC_VSX_SET))]
2985   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
2986    && TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
2987 {
2988   int ele = INTVAL (operands[3]);
2989   int nunits = GET_MODE_NUNITS (<MODE>mode);
2990
2991   if (!VECTOR_ELT_ORDER_BIG)
2992     ele = nunits - 1 - ele;
2993
2994   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
2995   if (<MODE>mode == V4SImode)
2996     return "xxinsertw %x0,%x2,%3";
2997   else
2998     return "vinsert<wd> %0,%2,%3";
2999 }
3000   [(set_attr "type" "vecperm")])
3001
3002 ;; Expanders for builtins
3003 (define_expand "vsx_mergel_<mode>"
3004   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
3005    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
3006    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
3007   "VECTOR_MEM_VSX_P (<MODE>mode)"
3008 {
3009   rtvec v;
3010   rtx x;
3011
3012   /* Special handling for LE with -maltivec=be.  */
3013   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
3014     {
3015       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
3016       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
3017     }
3018   else
3019     {
3020       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
3021       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
3022     }
3023
3024   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
3025   emit_insn (gen_rtx_SET (operands[0], x));
3026   DONE;
3027 })
3028
3029 (define_expand "vsx_mergeh_<mode>"
3030   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
3031    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
3032    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
3033   "VECTOR_MEM_VSX_P (<MODE>mode)"
3034 {
3035   rtvec v;
3036   rtx x;
3037
3038   /* Special handling for LE with -maltivec=be.  */
3039   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
3040     {
3041       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
3042       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
3043     }
3044   else
3045     {
3046       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
3047       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
3048     }
3049
3050   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
3051   emit_insn (gen_rtx_SET (operands[0], x));
3052   DONE;
3053 })
3054
3055 ;; V2DF/V2DI splat
3056 ;; We separate the register splat insn from the memory splat insn to force the
3057 ;; register allocator to generate the indexed form of the SPLAT when it is
3058 ;; given an offsettable memory reference.  Otherwise, if the register and
3059 ;; memory insns were combined into a single insn, the register allocator will
3060 ;; load the value into a register, and then do a double word permute.
3061 (define_expand "vsx_splat_<mode>"
3062   [(set (match_operand:VSX_D 0 "vsx_register_operand")
3063         (vec_duplicate:VSX_D
3064          (match_operand:<VS_scalar> 1 "input_operand")))]
3065   "VECTOR_MEM_VSX_P (<MODE>mode)"
3066 {
3067   rtx op1 = operands[1];
3068   if (MEM_P (op1))
3069     operands[1] = rs6000_address_for_fpconvert (op1);
3070   else if (!REG_P (op1))
3071     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
3072 })
3073
3074 (define_insn "vsx_splat_<mode>_reg"
3075   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
3076         (vec_duplicate:VSX_D
3077          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
3078   "VECTOR_MEM_VSX_P (<MODE>mode)"
3079   "@
3080    xxpermdi %x0,%x1,%x1,0
3081    mtvsrdd %x0,%1,%1"
3082   [(set_attr "type" "vecperm")])
3083
3084 (define_insn "vsx_splat_<VSX_D:mode>_mem"
3085   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
3086         (vec_duplicate:VSX_D
3087          (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
3088   "VECTOR_MEM_VSX_P (<MODE>mode)"
3089   "lxvdsx %x0,%y1"
3090   [(set_attr "type" "vecload")])
3091
3092 ;; V4SI splat support
3093 (define_insn "vsx_splat_v4si"
3094   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
3095         (vec_duplicate:V4SI
3096          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
3097   "TARGET_P9_VECTOR"
3098   "@
3099    mtvsrws %x0,%1
3100    lxvwsx %x0,%y1"
3101   [(set_attr "type" "vecperm,vecload")])
3102
3103 ;; SImode is not currently allowed in vector registers.  This pattern
3104 ;; allows us to use direct move to get the value in a vector register
3105 ;; so that we can use XXSPLTW
3106 (define_insn "vsx_splat_v4si_di"
3107   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
3108         (vec_duplicate:V4SI
3109          (truncate:SI
3110           (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
3111   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3112   "@
3113    xxspltw %x0,%x1,1
3114    mtvsrws %x0,%1"
3115   [(set_attr "type" "vecperm")])
3116
3117 ;; V4SF splat (ISA 3.0)
3118 (define_insn_and_split "vsx_splat_v4sf"
3119   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
3120         (vec_duplicate:V4SF
3121          (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
3122   "TARGET_P9_VECTOR"
3123   "@
3124    lxvwsx %x0,%y1
3125    #
3126    mtvsrws %x0,%1"
3127   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
3128   [(set (match_dup 0)
3129         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
3130    (set (match_dup 0)
3131         (unspec:V4SF [(match_dup 0)
3132                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
3133   ""
3134   [(set_attr "type" "vecload,vecperm,mftgpr")
3135    (set_attr "length" "4,8,4")])
3136
3137 ;; V4SF/V4SI splat from a vector element
3138 (define_insn "vsx_xxspltw_<mode>"
3139   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3140         (vec_duplicate:VSX_W
3141          (vec_select:<VS_scalar>
3142           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3143           (parallel
3144            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
3145   "VECTOR_MEM_VSX_P (<MODE>mode)"
3146 {
3147   if (!BYTES_BIG_ENDIAN)
3148     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
3149
3150   return "xxspltw %x0,%x1,%2";
3151 }
3152   [(set_attr "type" "vecperm")])
3153
3154 (define_insn "vsx_xxspltw_<mode>_direct"
3155   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3156         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3157                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
3158                       UNSPEC_VSX_XXSPLTW))]
3159   "VECTOR_MEM_VSX_P (<MODE>mode)"
3160   "xxspltw %x0,%x1,%2"
3161   [(set_attr "type" "vecperm")])
3162
3163 ;; V16QI/V8HI splat support on ISA 2.07
3164 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
3165   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
3166         (vec_duplicate:VSX_SPLAT_I
3167          (truncate:<VS_scalar>
3168           (match_operand:DI 1 "altivec_register_operand" "v"))))]
3169   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3170   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
3171   [(set_attr "type" "vecperm")])
3172
3173 ;; V2DF/V2DI splat for use by vec_splat builtin
3174 (define_insn "vsx_xxspltd_<mode>"
3175   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3176         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
3177                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
3178                       UNSPEC_VSX_XXSPLTD))]
3179   "VECTOR_MEM_VSX_P (<MODE>mode)"
3180 {
3181   if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
3182       || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
3183     return "xxpermdi %x0,%x1,%x1,0";
3184   else
3185     return "xxpermdi %x0,%x1,%x1,3";
3186 }
3187   [(set_attr "type" "vecperm")])
3188
3189 ;; V4SF/V4SI interleave
3190 (define_insn "vsx_xxmrghw_<mode>"
3191   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
3192         (vec_select:VSX_W
3193           (vec_concat:<VS_double>
3194             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
3195             (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
3196           (parallel [(const_int 0) (const_int 4)
3197                      (const_int 1) (const_int 5)])))]
3198   "VECTOR_MEM_VSX_P (<MODE>mode)"
3199 {
3200   if (BYTES_BIG_ENDIAN)
3201     return "xxmrghw %x0,%x1,%x2";
3202   else
3203     return "xxmrglw %x0,%x2,%x1";
3204 }
3205   [(set_attr "type" "vecperm")])
3206
3207 (define_insn "vsx_xxmrglw_<mode>"
3208   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
3209         (vec_select:VSX_W
3210           (vec_concat:<VS_double>
3211             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
3212             (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
3213           (parallel [(const_int 2) (const_int 6)
3214                      (const_int 3) (const_int 7)])))]
3215   "VECTOR_MEM_VSX_P (<MODE>mode)"
3216 {
3217   if (BYTES_BIG_ENDIAN)
3218     return "xxmrglw %x0,%x1,%x2";
3219   else
3220     return "xxmrghw %x0,%x2,%x1";
3221 }
3222   [(set_attr "type" "vecperm")])
3223
3224 ;; Shift left double by word immediate
3225 (define_insn "vsx_xxsldwi_<mode>"
3226   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
3227         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
3228                        (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
3229                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
3230                       UNSPEC_VSX_SLDWI))]
3231   "VECTOR_MEM_VSX_P (<MODE>mode)"
3232   "xxsldwi %x0,%x1,%x2,%3"
3233   [(set_attr "type" "vecperm")])
3234
3235 \f
3236 ;; Vector reduction insns and splitters
3237
3238 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
3239   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
3240         (VEC_reduc:V2DF
3241          (vec_concat:V2DF
3242           (vec_select:DF
3243            (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
3244            (parallel [(const_int 1)]))
3245           (vec_select:DF
3246            (match_dup 1)
3247            (parallel [(const_int 0)])))
3248          (match_dup 1)))
3249    (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
3250   "VECTOR_UNIT_VSX_P (V2DFmode)"
3251   "#"
3252   ""
3253   [(const_int 0)]
3254   "
3255 {
3256   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
3257              ? gen_reg_rtx (V2DFmode)
3258              : operands[2];
3259   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
3260   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
3261   DONE;
3262 }"
3263   [(set_attr "length" "8")
3264    (set_attr "type" "veccomplex")])
3265
3266 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
3267   [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
3268         (VEC_reduc:V4SF
3269          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
3270          (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
3271    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
3272    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
3273   "VECTOR_UNIT_VSX_P (V4SFmode)"
3274   "#"
3275   ""
3276   [(const_int 0)]
3277   "
3278 {
3279   rtx op0 = operands[0];
3280   rtx op1 = operands[1];
3281   rtx tmp2, tmp3, tmp4;
3282
3283   if (can_create_pseudo_p ())
3284     {
3285       tmp2 = gen_reg_rtx (V4SFmode);
3286       tmp3 = gen_reg_rtx (V4SFmode);
3287       tmp4 = gen_reg_rtx (V4SFmode);
3288     }
3289   else
3290     {
3291       tmp2 = operands[2];
3292       tmp3 = operands[3];
3293       tmp4 = tmp2;
3294     }
3295
3296   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
3297   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
3298   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
3299   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
3300   DONE;
3301 }"
3302   [(set_attr "length" "16")
3303    (set_attr "type" "veccomplex")])
3304
3305 ;; Combiner patterns with the vector reduction patterns that knows we can get
3306 ;; to the top element of the V2DF array without doing an extract.
3307
3308 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
3309   [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
3310         (vec_select:DF
3311          (VEC_reduc:V2DF
3312           (vec_concat:V2DF
3313            (vec_select:DF
3314             (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
3315             (parallel [(const_int 1)]))
3316            (vec_select:DF
3317             (match_dup 1)
3318             (parallel [(const_int 0)])))
3319           (match_dup 1))
3320          (parallel [(const_int 1)])))
3321    (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
3322   "VECTOR_UNIT_VSX_P (V2DFmode)"
3323   "#"
3324   ""
3325   [(const_int 0)]
3326   "
3327 {
3328   rtx hi = gen_highpart (DFmode, operands[1]);
3329   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
3330             ? gen_reg_rtx (DFmode)
3331             : operands[2];
3332
3333   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
3334   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
3335   DONE;
3336 }"
3337   [(set_attr "length" "8")
3338    (set_attr "type" "veccomplex")])
3339
3340 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
3341   [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
3342         (vec_select:SF
3343          (VEC_reduc:V4SF
3344           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
3345           (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
3346          (parallel [(const_int 3)])))
3347    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
3348    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
3349    (clobber (match_scratch:V4SF 4 "=0,0"))]
3350   "VECTOR_UNIT_VSX_P (V4SFmode)"
3351   "#"
3352   ""
3353   [(const_int 0)]
3354   "
3355 {
3356   rtx op0 = operands[0];
3357   rtx op1 = operands[1];
3358   rtx tmp2, tmp3, tmp4, tmp5;
3359
3360   if (can_create_pseudo_p ())
3361     {
3362       tmp2 = gen_reg_rtx (V4SFmode);
3363       tmp3 = gen_reg_rtx (V4SFmode);
3364       tmp4 = gen_reg_rtx (V4SFmode);
3365       tmp5 = gen_reg_rtx (V4SFmode);
3366     }
3367   else
3368     {
3369       tmp2 = operands[2];
3370       tmp3 = operands[3];
3371       tmp4 = tmp2;
3372       tmp5 = operands[4];
3373     }
3374
3375   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
3376   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
3377   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
3378   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
3379   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
3380   DONE;
3381 }"
3382   [(set_attr "length" "20")
3383    (set_attr "type" "veccomplex")])
3384
3385 \f
3386 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
3387 (define_peephole
3388   [(set (match_operand:P 0 "base_reg_operand" "")
3389         (match_operand:P 1 "short_cint_operand" ""))
3390    (set (match_operand:VSX_M 2 "vsx_register_operand" "")
3391         (mem:VSX_M (plus:P (match_dup 0)
3392                            (match_operand:P 3 "int_reg_operand" ""))))]
3393   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
3394   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
3395   [(set_attr "length" "8")
3396    (set_attr "type" "vecload")])
3397
3398 (define_peephole
3399   [(set (match_operand:P 0 "base_reg_operand" "")
3400         (match_operand:P 1 "short_cint_operand" ""))
3401    (set (match_operand:VSX_M 2 "vsx_register_operand" "")
3402         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
3403                            (match_dup 0))))]
3404   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
3405   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
3406   [(set_attr "length" "8")
3407    (set_attr "type" "vecload")])
3408
3409 \f
3410 ;; ISA 3.0 vector extend sign support
3411
3412 (define_insn "vsx_sign_extend_qi_<mode>"
3413   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
3414         (unspec:VSINT_84
3415          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
3416          UNSPEC_VSX_SIGN_EXTEND))]
3417   "TARGET_P9_VECTOR"
3418   "vextsb2<wd> %0,%1"
3419   [(set_attr "type" "vecexts")])
3420
3421 (define_insn "vsx_sign_extend_hi_<mode>"
3422   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
3423         (unspec:VSINT_84
3424          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
3425          UNSPEC_VSX_SIGN_EXTEND))]
3426   "TARGET_P9_VECTOR"
3427   "vextsh2<wd> %0,%1"
3428   [(set_attr "type" "vecexts")])
3429
3430 (define_insn "*vsx_sign_extend_si_v2di"
3431   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
3432         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
3433                      UNSPEC_VSX_SIGN_EXTEND))]
3434   "TARGET_P9_VECTOR"
3435   "vextsw2d %0,%1"
3436   [(set_attr "type" "vecexts")])
3437
3438 \f
3439 ;; ISA 3.0 Binary Floating-Point Support
3440
3441 ;; VSX Scalar Extract Exponent Double-Precision
3442 (define_insn "xsxexpdp"
3443   [(set (match_operand:DI 0 "register_operand" "=r")
3444         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
3445          UNSPEC_VSX_SXEXPDP))]
3446   "TARGET_P9_VECTOR && TARGET_64BIT"
3447   "xsxexpdp %0,%x1"
3448   [(set_attr "type" "integer")])
3449
3450 ;; VSX Scalar Extract Significand Double-Precision
3451 (define_insn "xsxsigdp"
3452   [(set (match_operand:DI 0 "register_operand" "=r")
3453         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
3454          UNSPEC_VSX_SXSIGDP))]
3455   "TARGET_P9_VECTOR && TARGET_64BIT"
3456   "xsxsigdp %0,%x1"
3457   [(set_attr "type" "integer")])
3458
3459 ;; VSX Scalar Insert Exponent Double-Precision
3460 (define_insn "xsiexpdp"
3461   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
3462         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
3463                     (match_operand:DI 2 "register_operand" "r")]
3464          UNSPEC_VSX_SIEXPDP))]
3465   "TARGET_P9_VECTOR && TARGET_64BIT"
3466   "xsiexpdp %x0,%1,%2"
3467   [(set_attr "type" "fpsimple")])
3468
3469 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
3470 (define_insn "xsiexpdpf"
3471   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
3472         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
3473                     (match_operand:DI 2 "register_operand" "r")]
3474          UNSPEC_VSX_SIEXPDP))]
3475   "TARGET_P9_VECTOR && TARGET_64BIT"
3476   "xsiexpdp %x0,%1,%2"
3477   [(set_attr "type" "fpsimple")])
3478
3479 ;; VSX Scalar Compare Exponents Double-Precision
3480 (define_expand "xscmpexpdp_<code>"
3481   [(set (match_dup 3)
3482         (compare:CCFP
3483          (unspec:DF
3484           [(match_operand:DF 1 "vsx_register_operand" "wa")
3485            (match_operand:DF 2 "vsx_register_operand" "wa")]
3486           UNSPEC_VSX_SCMPEXPDP)
3487          (const_int 0)))
3488    (set (match_operand:SI 0 "register_operand" "=r")
3489         (CMP_TEST:SI (match_dup 3)
3490                      (const_int 0)))]
3491   "TARGET_P9_VECTOR"
3492 {
3493   operands[3] = gen_reg_rtx (CCFPmode);
3494 })
3495
3496 (define_insn "*xscmpexpdp"
3497   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
3498         (compare:CCFP
3499          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
3500                      (match_operand:DF 2 "vsx_register_operand" "wa")]
3501           UNSPEC_VSX_SCMPEXPDP)
3502          (match_operand:SI 3 "zero_constant" "j")))]
3503   "TARGET_P9_VECTOR"
3504   "xscmpexpdp %0,%x1,%x2"
3505   [(set_attr "type" "fpcompare")])
3506
3507 ;; VSX Scalar Test Data Class Double- and Single-Precision
3508 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
3509 ;;   if any of the conditions tested by operand 2 are satisfied.
3510 ;;   The gt and unordered bits are cleared to zero.)
3511 (define_expand "xststdc<Fvsx>"
3512   [(set (match_dup 3)
3513         (compare:CCFP
3514          (unspec:SFDF
3515           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
3516            (match_operand:SI 2 "u7bit_cint_operand" "n")]
3517           UNSPEC_VSX_STSTDC)
3518          (match_dup 4)))
3519    (set (match_operand:SI 0 "register_operand" "=r")
3520         (eq:SI (match_dup 3)
3521                (const_int 0)))]
3522   "TARGET_P9_VECTOR"
3523 {
3524   operands[3] = gen_reg_rtx (CCFPmode);
3525   operands[4] = CONST0_RTX (SImode);
3526 })
3527
3528 ;; The VSX Scalar Test Data Class Double- and Single-Precision
3529 ;; instruction may also be used to test for negative value.
3530 (define_expand "xststdcneg<Fvsx>"
3531   [(set (match_dup 2)
3532         (compare:CCFP
3533          (unspec:SFDF
3534           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
3535            (const_int 0)]
3536           UNSPEC_VSX_STSTDC)
3537          (match_dup 3)))
3538    (set (match_operand:SI 0 "register_operand" "=r")
3539         (lt:SI (match_dup 2)
3540                (const_int 0)))]
3541   "TARGET_P9_VECTOR"
3542 {
3543   operands[2] = gen_reg_rtx (CCFPmode);
3544   operands[3] = CONST0_RTX (SImode);
3545 })
3546
3547 (define_insn "*xststdc<Fvsx>"
3548   [(set (match_operand:CCFP 0 "" "=y")
3549         (compare:CCFP
3550          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
3551                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
3552           UNSPEC_VSX_STSTDC)
3553          (match_operand:SI 3 "zero_constant" "j")))]
3554   "TARGET_P9_VECTOR"
3555   "xststdc<Fvsx> %0,%x1,%2"
3556   [(set_attr "type" "fpcompare")])
3557
3558 ;; VSX Vector Extract Exponent Double and Single Precision
3559 (define_insn "xvxexp<VSs>"
3560   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
3561         (unspec:VSX_F
3562          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
3563          UNSPEC_VSX_VXEXP))]
3564   "TARGET_P9_VECTOR"
3565   "xvxexp<VSs> %x0,%x1"
3566   [(set_attr "type" "vecsimple")])
3567
3568 ;; VSX Vector Extract Significand Double and Single Precision
3569 (define_insn "xvxsig<VSs>"
3570   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
3571         (unspec:VSX_F
3572          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
3573          UNSPEC_VSX_VXSIG))]
3574   "TARGET_P9_VECTOR"
3575   "xvxsig<VSs> %x0,%x1"
3576   [(set_attr "type" "vecsimple")])
3577
3578 ;; VSX Vector Insert Exponent Double and Single Precision
3579 (define_insn "xviexp<VSs>"
3580   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
3581         (unspec:VSX_F
3582          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
3583           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
3584          UNSPEC_VSX_VIEXP))]
3585   "TARGET_P9_VECTOR"
3586   "xviexp<VSs> %x0,%x1,%x2"
3587   [(set_attr "type" "vecsimple")])
3588
3589 ;; VSX Vector Test Data Class Double and Single Precision
3590 ;; The corresponding elements of the result vector are all ones
3591 ;; if any of the conditions tested by operand 3 are satisfied.
3592 (define_insn "xvtstdc<VSs>"
3593   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
3594         (unspec:<VSI>
3595          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
3596           (match_operand:SI 2 "u7bit_cint_operand" "n")]
3597          UNSPEC_VSX_VTSTDC))]
3598   "TARGET_P9_VECTOR"
3599   "xvtstdc<VSs> %x0,%x1,%2"
3600   [(set_attr "type" "vecsimple")])
3601
3602 ;; ISA 3.0 String Operations Support
3603
3604 ;; Compare vectors producing a vector result and a predicate, setting CR6
3605 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
3606 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
3607 ;; need to match v4sf, v2df, or v2di modes because those are expanded
3608 ;; to use Power8 instructions.
3609 (define_insn "*vsx_ne_<mode>_p"
3610   [(set (reg:CC CR6_REGNO)
3611         (unspec:CC
3612          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3613                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
3614          UNSPEC_PREDICATE))
3615    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
3616         (ne:VSX_EXTRACT_I (match_dup 1)
3617                           (match_dup 2)))]
3618   "TARGET_P9_VECTOR"
3619   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
3620   [(set_attr "type" "vecsimple")])
3621
3622 (define_insn "*vector_nez_<mode>_p"
3623   [(set (reg:CC CR6_REGNO)
3624         (unspec:CC [(unspec:VI
3625                      [(match_operand:VI 1 "gpc_reg_operand" "v")
3626                       (match_operand:VI 2 "gpc_reg_operand" "v")]
3627                      UNSPEC_NEZ_P)]
3628          UNSPEC_PREDICATE))
3629    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
3630         (unspec:VI [(match_dup 1)
3631                     (match_dup 2)]
3632          UNSPEC_NEZ_P))]
3633   "TARGET_P9_VECTOR"
3634   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
3635   [(set_attr "type" "vecsimple")])
3636
3637 ;; Load VSX Vector with Length
3638 (define_expand "lxvl"
3639   [(set (match_dup 3)
3640         (match_operand:DI 2 "register_operand"))
3641    (set (match_operand:V16QI 0 "vsx_register_operand")
3642         (unspec:V16QI
3643          [(match_operand:DI 1 "gpc_reg_operand")
3644           (match_dup 3)]
3645          UNSPEC_LXVL))]
3646   "TARGET_P9_VECTOR && TARGET_64BIT"
3647 {
3648   operands[3] = gen_reg_rtx (DImode);
3649 })
3650
3651 (define_insn "*lxvl"
3652   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3653         (unspec:V16QI
3654          [(match_operand:DI 1 "gpc_reg_operand" "b")
3655           (match_operand:DI 2 "register_operand" "+r")]
3656          UNSPEC_LXVL))]
3657   "TARGET_P9_VECTOR && TARGET_64BIT"
3658   "sldi %2,%2, 56\; lxvl %x0,%1,%2"
3659   [(set_attr "length" "8")
3660    (set_attr "type" "vecload")])
3661
3662 ;; Store VSX Vector with Length
3663 (define_expand "stxvl"
3664   [(set (match_dup 3)
3665         (match_operand:DI 2 "register_operand"))
3666    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
3667         (unspec:V16QI
3668          [(match_operand:V16QI 0 "vsx_register_operand")
3669           (match_dup 3)]
3670          UNSPEC_STXVL))]
3671   "TARGET_P9_VECTOR && TARGET_64BIT"
3672 {
3673   operands[3] = gen_reg_rtx (DImode);
3674 })
3675
3676 (define_insn "*stxvl"
3677   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
3678         (unspec:V16QI
3679          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
3680           (match_operand:DI 2 "register_operand" "+r")]
3681          UNSPEC_STXVL))]
3682   "TARGET_P9_VECTOR && TARGET_64BIT"
3683   "sldi %2,%2\;stxvl %x0,%1,%2"
3684   [(set_attr "length" "8")
3685    (set_attr "type" "vecstore")])
3686
3687 ;; Vector Compare Not Equal Byte
3688 (define_insn "vcmpneb"
3689   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
3690         (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v")
3691                        (match_operand:V16QI 2 "altivec_register_operand" "v")]
3692          UNSPEC_VCMPNEB))]
3693   "TARGET_P9_VECTOR"
3694   "vcmpneb %0,%1,%2"
3695   [(set_attr "type" "vecsimple")])
3696
3697 ;; Vector Compare Not Equal or Zero Byte
3698 (define_insn "vcmpnezb"
3699   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
3700         (unspec:V16QI
3701          [(match_operand:V16QI 1 "altivec_register_operand" "v")
3702           (match_operand:V16QI 2 "altivec_register_operand" "v")]
3703          UNSPEC_VCMPNEZB))]
3704   "TARGET_P9_VECTOR"
3705   "vcmpnezb %0,%1,%2"
3706   [(set_attr "type" "vecsimple")])
3707
3708 ;; Vector Compare Not Equal Half Word
3709 (define_insn "vcmpneh"
3710   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
3711         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
3712                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
3713          UNSPEC_VCMPNEH))]
3714   "TARGET_P9_VECTOR"
3715   "vcmpneh %0,%1,%2"
3716   [(set_attr "type" "vecsimple")])
3717
3718 ;; Vector Compare Not Equal or Zero Half Word
3719 (define_insn "vcmpnezh"
3720   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
3721         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
3722                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
3723          UNSPEC_VCMPNEZH))]
3724   "TARGET_P9_VECTOR"
3725   "vcmpnezh %0,%1,%2"
3726   [(set_attr "type" "vecsimple")])
3727
3728 ;; Vector Compare Not Equal Word
3729 (define_insn "vcmpnew"
3730   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
3731         (unspec:V4SI
3732          [(match_operand:V4SI 1 "altivec_register_operand" "v")
3733           (match_operand:V4SI 2 "altivec_register_operand" "v")]
3734          UNSPEC_VCMPNEH))]
3735   "TARGET_P9_VECTOR"
3736   "vcmpnew %0,%1,%2"
3737   [(set_attr "type" "vecsimple")])
3738
3739 ;; Vector Compare Not Equal or Zero Word
3740 (define_insn "vcmpnezw"
3741   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
3742         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
3743                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
3744          UNSPEC_VCMPNEZW))]
3745   "TARGET_P9_VECTOR"
3746   "vcmpnezw %0,%1,%2"
3747   [(set_attr "type" "vecsimple")])
3748
3749 ;; Vector Count Leading Zero Least-Significant Bits Byte
3750 (define_insn "vclzlsbb"
3751   [(set (match_operand:SI 0 "register_operand" "=r")
3752         (unspec:SI
3753          [(match_operand:V16QI 1 "altivec_register_operand" "v")]
3754          UNSPEC_VCLZLSBB))]
3755   "TARGET_P9_VECTOR"
3756   "vclzlsbb %0,%1"
3757   [(set_attr "type" "vecsimple")])
3758
3759 ;; Vector Count Trailing Zero Least-Significant Bits Byte
3760 (define_insn "vctzlsbb"
3761   [(set (match_operand:SI 0 "register_operand" "=r")
3762         (unspec:SI
3763          [(match_operand:V16QI 1 "altivec_register_operand" "v")]
3764          UNSPEC_VCTZLSBB))]
3765   "TARGET_P9_VECTOR"
3766   "vctzlsbb %0,%1"
3767   [(set_attr "type" "vecsimple")])
3768
3769 ;; Vector Extract Unsigned Byte Left-Indexed
3770 (define_insn "vextublx"
3771   [(set (match_operand:SI 0 "register_operand" "=r")
3772         (unspec:SI
3773          [(match_operand:SI 1 "register_operand" "r")
3774           (match_operand:V16QI 2 "altivec_register_operand" "v")]
3775          UNSPEC_VEXTUBLX))]
3776   "TARGET_P9_VECTOR"
3777   "vextublx %0,%1,%2"
3778   [(set_attr "type" "vecsimple")])
3779
3780 ;; Vector Extract Unsigned Byte Right-Indexed
3781 (define_insn "vextubrx"
3782   [(set (match_operand:SI 0 "register_operand" "=r")
3783         (unspec:SI
3784          [(match_operand:SI 1 "register_operand" "r")
3785           (match_operand:V16QI 2 "altivec_register_operand" "v")]
3786          UNSPEC_VEXTUBRX))]
3787   "TARGET_P9_VECTOR"
3788   "vextubrx %0,%1,%2"
3789   [(set_attr "type" "vecsimple")])
3790
3791 ;; Vector Extract Unsigned Half Word Left-Indexed
3792 (define_insn "vextuhlx"
3793   [(set (match_operand:SI 0 "register_operand" "=r")
3794         (unspec:SI
3795          [(match_operand:SI 1 "register_operand" "r")
3796           (match_operand:V8HI 2 "altivec_register_operand" "v")]
3797          UNSPEC_VEXTUHLX))]
3798   "TARGET_P9_VECTOR"
3799   "vextuhlx %0,%1,%2"
3800   [(set_attr "type" "vecsimple")])
3801
3802 ;; Vector Extract Unsigned Half Word Right-Indexed
3803 (define_insn "vextuhrx"
3804   [(set (match_operand:SI 0 "register_operand" "=r")
3805         (unspec:SI
3806          [(match_operand:SI 1 "register_operand" "r")
3807           (match_operand:V8HI 2 "altivec_register_operand" "v")]
3808          UNSPEC_VEXTUHRX))]
3809   "TARGET_P9_VECTOR"
3810   "vextuhrx %0,%1,%2"
3811   [(set_attr "type" "vecsimple")])
3812
3813 ;; Vector Extract Unsigned Word Left-Indexed
3814 (define_insn "vextuwlx"
3815   [(set (match_operand:SI 0 "register_operand" "=r")
3816         (unspec:SI
3817          [(match_operand:SI 1 "register_operand" "r")
3818           (match_operand:V4SI 2 "altivec_register_operand" "v")]
3819          UNSPEC_VEXTUWLX))]
3820   "TARGET_P9_VECTOR"
3821   "vextuwlx %0,%1,%2"
3822   [(set_attr "type" "vecsimple")])
3823
3824 ;; Vector Extract Unsigned Word Right-Indexed
3825 (define_insn "vextuwrx"
3826   [(set (match_operand:SI 0 "register_operand" "=r")
3827         (unspec:SI
3828          [(match_operand:SI 1 "register_operand" "r")
3829           (match_operand:V4SI 2 "altivec_register_operand" "v")]
3830          UNSPEC_VEXTUWRX))]
3831   "TARGET_P9_VECTOR"
3832   "vextuwrx %0,%1,%2"
3833   [(set_attr "type" "vecsimple")])
3834
3835 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
3836 ;; endian version needs to adjust the byte number, and the V4SI element in
3837 ;; vinsert4b.
3838 (define_expand "vextract4b"
3839   [(set (match_operand:DI 0 "gpc_reg_operand")
3840         (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
3841                     (match_operand:QI 2 "const_0_to_12_operand")]
3842                    UNSPEC_XXEXTRACTUW))]
3843   "TARGET_P9_VECTOR"
3844 {
3845   if (!VECTOR_ELT_ORDER_BIG)
3846     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
3847 })
3848
3849 (define_insn_and_split "*vextract4b_internal"
3850   [(set (match_operand:DI 0 "gpc_reg_operand" "=wj,r")
3851         (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand" "wa,v")
3852                     (match_operand:QI 2 "const_0_to_12_operand" "n,n")]
3853                    UNSPEC_XXEXTRACTUW))]
3854   "TARGET_P9_VECTOR"
3855   "@
3856    xxextractuw %x0,%x1,%2
3857    #"
3858   "&& reload_completed && int_reg_operand (operands[0], DImode)"
3859   [(const_int 0)]
3860 {
3861   rtx op0 = operands[0];
3862   rtx op1 = operands[1];
3863   rtx op2 = operands[2];
3864   rtx op0_si = gen_rtx_REG (SImode, REGNO (op0));
3865   rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (op1));
3866
3867   emit_move_insn (op0, op2);
3868   if (VECTOR_ELT_ORDER_BIG)
3869     emit_insn (gen_vextuwlx (op0_si, op0_si, op1_v4si));
3870   else
3871     emit_insn (gen_vextuwrx (op0_si, op0_si, op1_v4si));
3872   DONE;
3873 }
3874   [(set_attr "type" "vecperm")])
3875
3876 (define_expand "vinsert4b"
3877   [(set (match_operand:V16QI 0 "vsx_register_operand")
3878         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
3879                        (match_operand:V16QI 2 "vsx_register_operand")
3880                        (match_operand:QI 3 "const_0_to_12_operand")]
3881                    UNSPEC_XXINSERTW))]
3882   "TARGET_P9_VECTOR"
3883 {
3884   if (!VECTOR_ELT_ORDER_BIG)
3885     {
3886       rtx op1 = operands[1];
3887       rtx v4si_tmp = gen_reg_rtx (V4SImode);
3888       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
3889       operands[1] = v4si_tmp;
3890       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
3891     }
3892 })
3893
3894 (define_insn "*vinsert4b_internal"
3895   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3896         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
3897                        (match_operand:V16QI 2 "vsx_register_operand" "0")
3898                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
3899                    UNSPEC_XXINSERTW))]
3900   "TARGET_P9_VECTOR"
3901   "xxinsertw %x0,%x1,%3"
3902   [(set_attr "type" "vecperm")])
3903
3904 (define_expand "vinsert4b_di"
3905   [(set (match_operand:V16QI 0 "vsx_register_operand")
3906         (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand")
3907                        (match_operand:V16QI 2 "vsx_register_operand")
3908                        (match_operand:QI 3 "const_0_to_12_operand")]
3909                    UNSPEC_XXINSERTW))]
3910   "TARGET_P9_VECTOR"
3911 {
3912   if (!VECTOR_ELT_ORDER_BIG)
3913     operands[3] = GEN_INT (12 - INTVAL (operands[3]));
3914 })
3915
3916 (define_insn "*vinsert4b_di_internal"
3917   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3918         (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand" "wj")
3919                        (match_operand:V16QI 2 "vsx_register_operand" "0")
3920                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
3921                    UNSPEC_XXINSERTW))]
3922   "TARGET_P9_VECTOR"
3923   "xxinsertw %x0,%x1,%3"
3924   [(set_attr "type" "vecperm")])
3925
3926 \f
3927 ;; Support for ISA 3.0 vector byte reverse
3928
3929 ;; Swap all bytes with in a vector
3930 (define_insn "p9_xxbrq_v1ti"
3931   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
3932         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
3933   "TARGET_P9_VECTOR"
3934   "xxbrq %x0,%x1"
3935   [(set_attr "type" "vecperm")])
3936
3937 (define_expand "p9_xxbrq_v16qi"
3938   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
3939    (use (match_operand:V16QI 1 "vsx_register_operand" "=wa"))]
3940   "TARGET_P9_VECTOR"
3941 {
3942   rtx op0 = gen_lowpart (V1TImode, operands[0]);
3943   rtx op1 = gen_lowpart (V1TImode, operands[1]);
3944   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
3945   DONE;
3946 })
3947
3948 ;; Swap all bytes in each 64-bit element
3949 (define_insn "p9_xxbrd_<mode>"
3950   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3951         (bswap:VSX_D (match_operand:VSX_D 1 "vsx_register_operand" "wa")))]
3952   "TARGET_P9_VECTOR"
3953   "xxbrd %x0,%x1"
3954   [(set_attr "type" "vecperm")])
3955
3956 ;; Swap all bytes in each 32-bit element
3957 (define_insn "p9_xxbrw_<mode>"
3958   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3959         (bswap:VSX_W (match_operand:VSX_W 1 "vsx_register_operand" "wa")))]
3960   "TARGET_P9_VECTOR"
3961   "xxbrw %x0,%x1"
3962   [(set_attr "type" "vecperm")])
3963
3964 ;; Swap all bytes in each 16-bit element
3965 (define_insn "p9_xxbrh_v8hi"
3966   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3967         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
3968   "TARGET_P9_VECTOR"
3969   "xxbrh %x0,%x1"
3970   [(set_attr "type" "vecperm")])
3971 \f
3972
3973 ;; Operand numbers for the following peephole2
3974 (define_constants
3975   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
3976    (SFBOOL_TMP_VSX               1)             ;; vector temporary
3977    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
3978    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
3979    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
3980    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
3981    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
3982    (SFBOOL_SHL_D                 7)             ;; shift left dest
3983    (SFBOOL_SHL_A                 8)             ;; shift left arg
3984    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
3985    (SFBOOL_BOOL_A_DI            10)             ;; SFBOOL_BOOL_A1/A2 as DImode
3986    (SFBOOL_TMP_VSX_DI           11)             ;; SFBOOL_TMP_VSX as DImode
3987    (SFBOOL_MTVSR_D_V4SF         12)])           ;; SFBOOL_MTVSRD_D as V4SFmode
3988
3989 ;; Attempt to optimize some common GLIBC operations using logical operations to
3990 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
3991 ;; after macro expansion that looks like:
3992 ;;
3993 ;;      typedef union {
3994 ;;        float value;
3995 ;;        uint32_t word;
3996 ;;      } ieee_float_shape_type;
3997 ;;
3998 ;;      float t1;
3999 ;;      int32_t is;
4000 ;;
4001 ;;      do {
4002 ;;        ieee_float_shape_type gf_u;
4003 ;;        gf_u.value = (t1);
4004 ;;        (is) = gf_u.word;
4005 ;;      } while (0);
4006 ;;
4007 ;;      do {
4008 ;;        ieee_float_shape_type sf_u;
4009 ;;        sf_u.word = (is & 0xfffff000);
4010 ;;        (t1) = sf_u.value;
4011 ;;      } while (0);
4012 ;;
4013 ;;
4014 ;; This would result in two direct move operations (convert to memory format,
4015 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
4016 ;; scalar format).  With this peephole, we eliminate the direct move to the
4017 ;; GPR, and instead move the integer mask value to the vector register after a
4018 ;; shift and do the VSX logical operation.
4019
4020 ;; The insns for dealing with SFmode in GPR registers looks like:
4021 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
4022 ;;
4023 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
4024 ;;
4025 ;; (set (reg:DI reg3) (lshiftrt:DI (reg:DI reg3) (const_int 32)))
4026 ;;
4027 ;; (set (reg:DI reg5) (and:DI (reg:DI reg3) (reg:DI reg4)))
4028 ;;
4029 ;; (set (reg:DI reg6) (ashift:DI (reg:DI reg5) (const_int 32)))
4030 ;;
4031 ;; (set (reg:SF reg7) (unspec:SF [(reg:DI reg6)] UNSPEC_P8V_MTVSRD))
4032 ;;
4033 ;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg7)] UNSPEC_VSX_CVSPDPN))
4034
4035 (define_peephole2
4036   [(match_scratch:DI SFBOOL_TMP_GPR "r")
4037    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
4038
4039    ;; MFVSRD
4040    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
4041         (unspec:DI [(match_operand:V4SF SFBOOL_MFVSR_A "vsx_register_operand")]
4042                    UNSPEC_P8V_RELOAD_FROM_VSX))
4043
4044    ;; SRDI
4045    (set (match_dup SFBOOL_MFVSR_D)
4046         (lshiftrt:DI (match_dup SFBOOL_MFVSR_D)
4047                      (const_int 32)))
4048
4049    ;; AND/IOR/XOR operation on int
4050    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
4051         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
4052                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
4053
4054    ;; SLDI
4055    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
4056         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
4057                    (const_int 32)))
4058
4059    ;; MTVSRD
4060    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
4061         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
4062
4063   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
4064    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
4065       to compare registers, when the mode is different.  */
4066    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
4067    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
4068    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
4069    && (REG_P (operands[SFBOOL_BOOL_A2])
4070        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
4071    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
4072        || peep2_reg_dead_p (3, operands[SFBOOL_MFVSR_D]))
4073    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
4074        || (REG_P (operands[SFBOOL_BOOL_A2])
4075            && REGNO (operands[SFBOOL_MFVSR_D])
4076                 == REGNO (operands[SFBOOL_BOOL_A2])))
4077    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
4078    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
4079        || peep2_reg_dead_p (4, operands[SFBOOL_BOOL_D]))
4080    && peep2_reg_dead_p (5, operands[SFBOOL_SHL_D])"
4081   [(set (match_dup SFBOOL_TMP_GPR)
4082         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
4083                    (const_int 32)))
4084
4085    (set (match_dup SFBOOL_TMP_VSX_DI)
4086         (match_dup SFBOOL_TMP_GPR))
4087
4088    (set (match_dup SFBOOL_MTVSR_D_V4SF)
4089         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A)
4090                           (match_dup SFBOOL_TMP_VSX)))]
4091 {
4092   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
4093   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
4094   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
4095   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
4096   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
4097
4098   if (CONST_INT_P (bool_a2))
4099     {
4100       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
4101       emit_move_insn (tmp_gpr, bool_a2);
4102       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
4103     }
4104   else
4105     {
4106       int regno_bool_a1 = REGNO (bool_a1);
4107       int regno_bool_a2 = REGNO (bool_a2);
4108       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
4109                           ? regno_bool_a2 : regno_bool_a1);
4110       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
4111     }
4112
4113   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
4114   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
4115 })