gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2017 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Mode attribute for vector floate and floato conversions
  25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
  26
  27 ;; Iterator for both scalar and vector floating point types supported by VSX
  28 (define_mode_iterator VSX_B [DF V4SF V2DF])
  29
  30 ;; Iterator for the 2 64-bit vector types
  31 (define_mode_iterator VSX_D [V2DF V2DI])
  32
  33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  34 ;; types that goes in a single vector register.
  35 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  36                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  37                                   TI
  38                                   V1TI])
  39
  40 ;; Iterator for 128-bit integer types that go in a single vector register.
  41 (define_mode_iterator VSX_TI [TI V1TI])
  42
  43 ;; Iterator for the 2 32-bit vector types
  44 (define_mode_iterator VSX_W [V4SF V4SI])
  45
  46 ;; Iterator for the DF types
  47 (define_mode_iterator VSX_DF [V2DF DF])
  48
  49 ;; Iterator for vector floating point types supported by VSX
  50 (define_mode_iterator VSX_F [V4SF V2DF])
  51
  52 ;; Iterator for logical types supported by VSX
  53 (define_mode_iterator VSX_L [V16QI
  54                              V8HI
  55                              V4SI
  56                              V2DI
  57                              V4SF
  58                              V2DF
  59                              V1TI
  60                              TI
  61                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  62                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  63
  64 ;; Iterator for memory moves.
  65 (define_mode_iterator VSX_M [V16QI
  66                              V8HI
  67                              V4SI
  68                              V2DI
  69                              V4SF
  70                              V2DF
  71                              V1TI
  72                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  73                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  74                              TI])
  75
  76 ;; Map into the appropriate load/store name based on the type
  77 (define_mode_attr VSm  [(V16QI "vw4")
  78                         (V8HI  "vw4")
  79                         (V4SI  "vw4")
  80                         (V4SF  "vw4")
  81                         (V2DF  "vd2")
  82                         (V2DI  "vd2")
  83                         (DF    "d")
  84                         (TF    "vd2")
  85                         (KF    "vd2")
  86                         (V1TI  "vd2")
  87                         (TI    "vd2")])
  88
  89 ;; Map into the appropriate suffix based on the type
  90 (define_mode_attr VSs   [(V16QI "sp")
  91                          (V8HI  "sp")
  92                          (V4SI  "sp")
  93                          (V4SF  "sp")
  94                          (V2DF  "dp")
  95                          (V2DI  "dp")
  96                          (DF    "dp")
  97                          (SF    "sp")
  98                          (TF    "dp")
  99                          (KF    "dp")
 100                          (V1TI  "dp")
 101                          (TI    "dp")])
 102
 103 ;; Map the register class used
 104 (define_mode_attr VSr   [(V16QI "v")
 105                          (V8HI  "v")
 106                          (V4SI  "v")
 107                          (V4SF  "wf")
 108                          (V2DI  "wd")
 109                          (V2DF  "wd")
 110                          (DI    "wi")
 111                          (DF    "ws")
 112                          (SF    "ww")
 113                          (TF    "wp")
 114                          (KF    "wq")
 115                          (V1TI  "v")
 116                          (TI    "wt")])
 117
 118 ;; Map the register class used for float<->int conversions (floating point side)
 119 ;; VSr2 is the preferred register class, VSr3 is any register class that will
 120 ;; hold the data
 121 (define_mode_attr VSr2  [(V2DF  "wd")
 122                          (V4SF  "wf")
 123                          (DF    "ws")
 124                          (SF    "ww")
 125                          (DI    "wi")
 126                          (KF    "wq")
 127                          (TF    "wp")])
 128
 129 (define_mode_attr VSr3  [(V2DF  "wa")
 130                          (V4SF  "wa")
 131                          (DF    "ws")
 132                          (SF    "ww")
 133                          (DI    "wi")
 134                          (KF    "wq")
 135                          (TF    "wp")])
 136
 137 ;; Map the register class for sp<->dp float conversions, destination
 138 (define_mode_attr VSr4  [(SF    "ws")
 139                          (DF    "f")
 140                          (V2DF  "wd")
 141                          (V4SF  "v")])
 142
 143 ;; Map the register class for sp<->dp float conversions, source
 144 (define_mode_attr VSr5  [(SF    "ws")
 145                          (DF    "f")
 146                          (V2DF  "v")
 147                          (V4SF  "wd")])
 148
 149 ;; The VSX register class that a type can occupy, even if it is not the
 150 ;; preferred register class (VSr is the preferred register class that will get
 151 ;; allocated first).
 152 (define_mode_attr VSa   [(V16QI "wa")
 153                          (V8HI  "wa")
 154                          (V4SI  "wa")
 155                          (V4SF  "wa")
 156                          (V2DI  "wa")
 157                          (V2DF  "wa")
 158                          (DI    "wi")
 159                          (DF    "ws")
 160                          (SF    "ww")
 161                          (V1TI  "wa")
 162                          (TI    "wt")
 163                          (TF    "wp")
 164                          (KF    "wq")])
 165
 166 ;; Same size integer type for floating point data
 167 (define_mode_attr VSi [(V4SF  "v4si")
 168                        (V2DF  "v2di")
 169                        (DF    "di")])
 170
 171 (define_mode_attr VSI [(V4SF  "V4SI")
 172                        (V2DF  "V2DI")
 173                        (DF    "DI")])
 174
 175 ;; Word size for same size conversion
 176 (define_mode_attr VSc [(V4SF "w")
 177                        (V2DF "d")
 178                        (DF   "d")])
 179
 180 ;; Map into either s or v, depending on whether this is a scalar or vector
 181 ;; operation
 182 (define_mode_attr VSv   [(V16QI "v")
 183                          (V8HI  "v")
 184                          (V4SI  "v")
 185                          (V4SF  "v")
 186                          (V2DI  "v")
 187                          (V2DF  "v")
 188                          (V1TI  "v")
 189                          (DF    "s")
 190                          (KF    "v")])
 191
 192 ;; Appropriate type for add ops (and other simple FP ops)
 193 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 194                                  (V4SF "vecfloat")
 195                                  (DF   "fp")])
 196
 197 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
 198                                    (V4SF "fp_addsub_s")
 199                                    (DF   "fp_addsub_d")])
 200
 201 ;; Appropriate type for multiply ops
 202 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 203                                  (V4SF "vecfloat")
 204                                  (DF   "dmul")])
 205
 206 (define_mode_attr VSfptype_mul  [(V2DF "fp_mul_d")
 207                                  (V4SF "fp_mul_s")
 208                                  (DF   "fp_mul_d")])
 209
 210 ;; Appropriate type for divide ops.
 211 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 212                                  (V4SF "vecfdiv")
 213                                  (DF   "ddiv")])
 214
 215 (define_mode_attr VSfptype_div  [(V2DF "fp_div_d")
 216                                  (V4SF "fp_div_s")
 217                                  (DF   "fp_div_d")])
 218
 219 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 220 ;; the scalar sqrt
 221 (define_mode_attr VStype_sqrt   [(V2DF "dsqrt")
 222                                  (V4SF "ssqrt")
 223                                  (DF   "dsqrt")])
 224
 225 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
 226                                  (V4SF "fp_sqrt_s")
 227                                  (DF   "fp_sqrt_d")])
 228
 229 ;; Iterator and modes for sp<->dp conversions
 230 ;; Because scalar SF values are represented internally as double, use the
 231 ;; V4SF type to represent this than SF.
 232 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
 233
 234 (define_mode_attr VS_spdp_res [(DF      "V4SF")
 235                                (V4SF    "V2DF")
 236                                (V2DF    "V4SF")])
 237
 238 (define_mode_attr VS_spdp_insn [(DF     "xscvdpsp")
 239                                 (V4SF   "xvcvspdp")
 240                                 (V2DF   "xvcvdpsp")])
 241
 242 (define_mode_attr VS_spdp_type [(DF     "fp")
 243                                 (V4SF   "vecdouble")
 244                                 (V2DF   "vecdouble")])
 245
 246 ;; Map the scalar mode for a vector type
 247 (define_mode_attr VS_scalar [(V1TI      "TI")
 248                              (V2DF      "DF")
 249                              (V2DI      "DI")
 250                              (V4SF      "SF")
 251                              (V4SI      "SI")
 252                              (V8HI      "HI")
 253                              (V16QI     "QI")])
 254
 255 ;; Map to a double-sized vector mode
 256 (define_mode_attr VS_double [(V4SI      "V8SI")
 257                              (V4SF      "V8SF")
 258                              (V2DI      "V4DI")
 259                              (V2DF      "V4DF")
 260                              (V1TI      "V2TI")])
 261
 262 ;; Map register class for 64-bit element in 128-bit vector for direct moves
 263 ;; to/from gprs
 264 (define_mode_attr VS_64dm [(V2DF        "wk")
 265                            (V2DI        "wj")])
 266
 267 ;; Map register class for 64-bit element in 128-bit vector for normal register
 268 ;; to register moves
 269 (define_mode_attr VS_64reg [(V2DF       "ws")
 270                             (V2DI       "wi")])
 271
 272 ;; Iterators for loading constants with xxspltib
 273 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 274 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 275
 276 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 277 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 278 ;; done on ISA 2.07 and not just ISA 3.0.
 279 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 280 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 281
 282 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 283                                      (V8HI "h")
 284                                      (V4SI "w")])
 285
 286 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 287 ;; insert to validate the operand number.
 288 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 289                                          (V8HI  "const_0_to_7_operand")
 290                                          (V4SI  "const_0_to_3_operand")])
 291
 292 ;; Mode attribute to give the constraint for vector extract and insert
 293 ;; operations.
 294 (define_mode_attr VSX_EX [(V16QI "v")
 295                           (V8HI  "v")
 296                           (V4SI  "wa")])
 297
 298 ;; Mode iterator for binary floating types other than double to
 299 ;; optimize convert to that floating point type from an extract
 300 ;; of an integer type
 301 (define_mode_iterator VSX_EXTRACT_FL [SF
 302                                       (IF "FLOAT128_2REG_P (IFmode)")
 303                                       (KF "TARGET_FLOAT128_HW")
 304                                       (TF "FLOAT128_2REG_P (TFmode)
 305                                            || (FLOAT128_IEEE_P (TFmode)
 306                                                && TARGET_FLOAT128_HW)")])
 307
 308 ;; Mode iterator for binary floating types that have a direct conversion
 309 ;; from 64-bit integer to floating point
 310 (define_mode_iterator FL_CONV [SF
 311                                DF
 312                                (KF "TARGET_FLOAT128_HW")
 313                                (TF "TARGET_FLOAT128_HW
 314                                     && FLOAT128_IEEE_P (TFmode)")])
 315
 316 ;; Iterator for the 2 short vector types to do a splat from an integer
 317 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 318
 319 ;; Mode attribute to give the count for the splat instruction to splat
 320 ;; the value in the 64-bit integer slot
 321 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 322
 323 ;; Mode attribute to give the suffix for the splat instruction
 324 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 325
 326 ;; Constants for creating unspecs
 327 (define_c_enum "unspec"
 328   [UNSPEC_VSX_CONCAT
 329    UNSPEC_VSX_CVDPSXWS
 330    UNSPEC_VSX_CVDPUXWS
 331    UNSPEC_VSX_CVSPDP
 332    UNSPEC_VSX_CVHPSP
 333    UNSPEC_VSX_CVSPDPN
 334    UNSPEC_VSX_CVDPSPN
 335    UNSPEC_VSX_CVSXWDP
 336    UNSPEC_VSX_CVUXWDP
 337    UNSPEC_VSX_CVSXDSP
 338    UNSPEC_VSX_CVUXDSP
 339    UNSPEC_VSX_CVSPSXDS
 340    UNSPEC_VSX_CVSPUXDS
 341    UNSPEC_VSX_CVSXWSP
 342    UNSPEC_VSX_CVUXWSP
 343    UNSPEC_VSX_FLOAT2
 344    UNSPEC_VSX_UNS_FLOAT2
 345    UNSPEC_VSX_FLOATE
 346    UNSPEC_VSX_UNS_FLOATE
 347    UNSPEC_VSX_FLOATO
 348    UNSPEC_VSX_UNS_FLOATO
 349    UNSPEC_VSX_TDIV
 350    UNSPEC_VSX_TSQRT
 351    UNSPEC_VSX_SET
 352    UNSPEC_VSX_ROUND_I
 353    UNSPEC_VSX_ROUND_IC
 354    UNSPEC_VSX_SLDWI
 355    UNSPEC_VSX_XXPERM
 356
 357    UNSPEC_VSX_XXSPLTW
 358    UNSPEC_VSX_XXSPLTD
 359    UNSPEC_VSX_DIVSD
 360    UNSPEC_VSX_DIVUD
 361    UNSPEC_VSX_MULSD
 362    UNSPEC_VSX_XVCVSXDDP
 363    UNSPEC_VSX_XVCVUXDDP
 364    UNSPEC_VSX_XVCVDPSXDS
 365    UNSPEC_VSX_XVCVDPUXDS
 366    UNSPEC_VSX_SIGN_EXTEND
 367    UNSPEC_VSX_XVCVSPSXWS
 368    UNSPEC_VSX_XVCVSPSXDS
 369    UNSPEC_VSX_VSLO
 370    UNSPEC_VSX_EXTRACT
 371    UNSPEC_VSX_SXEXPDP
 372    UNSPEC_VSX_SXSIG
 373    UNSPEC_VSX_SIEXPDP
 374    UNSPEC_VSX_SIEXPQP
 375    UNSPEC_VSX_SCMPEXPDP
 376    UNSPEC_VSX_STSTDC
 377    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
 378    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
 379    UNSPEC_VSX_VXEXP
 380    UNSPEC_VSX_VXSIG
 381    UNSPEC_VSX_VIEXP
 382    UNSPEC_VSX_VTSTDC
 383    UNSPEC_VSX_VEC_INIT
 384    UNSPEC_VSX_VSIGNED2
 385
 386    UNSPEC_LXVL
 387    UNSPEC_LXVLL
 388    UNSPEC_LVSL_REG
 389    UNSPEC_LVSR_REG
 390    UNSPEC_STXVL
 391    UNSPEC_STXVLL
 392    UNSPEC_XL_LEN_R
 393    UNSPEC_XST_LEN_R
 394
 395    UNSPEC_VCLZLSBB
 396    UNSPEC_VCTZLSBB
 397    UNSPEC_VEXTUBLX
 398    UNSPEC_VEXTUHLX
 399    UNSPEC_VEXTUWLX
 400    UNSPEC_VEXTUBRX
 401    UNSPEC_VEXTUHRX
 402    UNSPEC_VEXTUWRX
 403    UNSPEC_VCMPNEB
 404    UNSPEC_VCMPNEZB
 405    UNSPEC_VCMPNEH
 406    UNSPEC_VCMPNEZH
 407    UNSPEC_VCMPNEW
 408    UNSPEC_VCMPNEZW
 409    UNSPEC_XXEXTRACTUW
 410    UNSPEC_XXINSERTW
 411   ])
 412
 413 ;; VSX moves
 414
 415 ;; The patterns for LE permuted loads and stores come before the general
 416 ;; VSX moves so they match first.
 417 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 418   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
 419         (match_operand:VSX_D 1 "memory_operand" "Z"))]
 420   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 421   "#"
 422   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 423   [(set (match_dup 2)
 424         (vec_select:<MODE>
 425           (match_dup 1)
 426           (parallel [(const_int 1) (const_int 0)])))
 427    (set (match_dup 0)
 428         (vec_select:<MODE>
 429           (match_dup 2)
 430           (parallel [(const_int 1) (const_int 0)])))]
 431   "
 432 {
 433   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 434                                        : operands[0];
 435 }
 436   "
 437   [(set_attr "type" "vecload")
 438    (set_attr "length" "8")])
 439
 440 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 441   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
 442         (match_operand:VSX_W 1 "memory_operand" "Z"))]
 443   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 444   "#"
 445   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 446   [(set (match_dup 2)
 447         (vec_select:<MODE>
 448           (match_dup 1)
 449           (parallel [(const_int 2) (const_int 3)
 450                      (const_int 0) (const_int 1)])))
 451    (set (match_dup 0)
 452         (vec_select:<MODE>
 453           (match_dup 2)
 454           (parallel [(const_int 2) (const_int 3)
 455                      (const_int 0) (const_int 1)])))]
 456   "
 457 {
 458   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 459                                        : operands[0];
 460 }
 461   "
 462   [(set_attr "type" "vecload")
 463    (set_attr "length" "8")])
 464
 465 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 466   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 467         (match_operand:V8HI 1 "memory_operand" "Z"))]
 468   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 469   "#"
 470   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 471   [(set (match_dup 2)
 472         (vec_select:V8HI
 473           (match_dup 1)
 474           (parallel [(const_int 4) (const_int 5)
 475                      (const_int 6) (const_int 7)
 476                      (const_int 0) (const_int 1)
 477                      (const_int 2) (const_int 3)])))
 478    (set (match_dup 0)
 479         (vec_select:V8HI
 480           (match_dup 2)
 481           (parallel [(const_int 4) (const_int 5)
 482                      (const_int 6) (const_int 7)
 483                      (const_int 0) (const_int 1)
 484                      (const_int 2) (const_int 3)])))]
 485   "
 486 {
 487   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 488                                        : operands[0];
 489 }
 490   "
 491   [(set_attr "type" "vecload")
 492    (set_attr "length" "8")])
 493
 494 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 495   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 496         (match_operand:V16QI 1 "memory_operand" "Z"))]
 497   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 498   "#"
 499   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 500   [(set (match_dup 2)
 501         (vec_select:V16QI
 502           (match_dup 1)
 503           (parallel [(const_int 8) (const_int 9)
 504                      (const_int 10) (const_int 11)
 505                      (const_int 12) (const_int 13)
 506                      (const_int 14) (const_int 15)
 507                      (const_int 0) (const_int 1)
 508                      (const_int 2) (const_int 3)
 509                      (const_int 4) (const_int 5)
 510                      (const_int 6) (const_int 7)])))
 511    (set (match_dup 0)
 512         (vec_select:V16QI
 513           (match_dup 2)
 514           (parallel [(const_int 8) (const_int 9)
 515                      (const_int 10) (const_int 11)
 516                      (const_int 12) (const_int 13)
 517                      (const_int 14) (const_int 15)
 518                      (const_int 0) (const_int 1)
 519                      (const_int 2) (const_int 3)
 520                      (const_int 4) (const_int 5)
 521                      (const_int 6) (const_int 7)])))]
 522   "
 523 {
 524   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 525                                        : operands[0];
 526 }
 527   "
 528   [(set_attr "type" "vecload")
 529    (set_attr "length" "8")])
 530
 531 (define_insn "*vsx_le_perm_store_<mode>"
 532   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
 533         (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
 534   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 535   "#"
 536   [(set_attr "type" "vecstore")
 537    (set_attr "length" "12")])
 538
 539 (define_split
 540   [(set (match_operand:VSX_D 0 "memory_operand" "")
 541         (match_operand:VSX_D 1 "vsx_register_operand" ""))]
 542   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 543   [(set (match_dup 2)
 544         (vec_select:<MODE>
 545           (match_dup 1)
 546           (parallel [(const_int 1) (const_int 0)])))
 547    (set (match_dup 0)
 548         (vec_select:<MODE>
 549           (match_dup 2)
 550           (parallel [(const_int 1) (const_int 0)])))]
 551 {
 552   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 553                                        : operands[1];
 554 })
 555
 556 ;; The post-reload split requires that we re-permute the source
 557 ;; register in case it is still live.
 558 (define_split
 559   [(set (match_operand:VSX_D 0 "memory_operand" "")
 560         (match_operand:VSX_D 1 "vsx_register_operand" ""))]
 561   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 562   [(set (match_dup 1)
 563         (vec_select:<MODE>
 564           (match_dup 1)
 565           (parallel [(const_int 1) (const_int 0)])))
 566    (set (match_dup 0)
 567         (vec_select:<MODE>
 568           (match_dup 1)
 569           (parallel [(const_int 1) (const_int 0)])))
 570    (set (match_dup 1)
 571         (vec_select:<MODE>
 572           (match_dup 1)
 573           (parallel [(const_int 1) (const_int 0)])))]
 574   "")
 575
 576 (define_insn "*vsx_le_perm_store_<mode>"
 577   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
 578         (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
 579   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 580   "#"
 581   [(set_attr "type" "vecstore")
 582    (set_attr "length" "12")])
 583
 584 (define_split
 585   [(set (match_operand:VSX_W 0 "memory_operand" "")
 586         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 587   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 588   [(set (match_dup 2)
 589         (vec_select:<MODE>
 590           (match_dup 1)
 591           (parallel [(const_int 2) (const_int 3)
 592                      (const_int 0) (const_int 1)])))
 593    (set (match_dup 0)
 594         (vec_select:<MODE>
 595           (match_dup 2)
 596           (parallel [(const_int 2) (const_int 3)
 597                      (const_int 0) (const_int 1)])))]
 598 {
 599   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 600                                        : operands[1];
 601 })
 602
 603 ;; The post-reload split requires that we re-permute the source
 604 ;; register in case it is still live.
 605 (define_split
 606   [(set (match_operand:VSX_W 0 "memory_operand" "")
 607         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 608   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 609   [(set (match_dup 1)
 610         (vec_select:<MODE>
 611           (match_dup 1)
 612           (parallel [(const_int 2) (const_int 3)
 613                      (const_int 0) (const_int 1)])))
 614    (set (match_dup 0)
 615         (vec_select:<MODE>
 616           (match_dup 1)
 617           (parallel [(const_int 2) (const_int 3)
 618                      (const_int 0) (const_int 1)])))
 619    (set (match_dup 1)
 620         (vec_select:<MODE>
 621           (match_dup 1)
 622           (parallel [(const_int 2) (const_int 3)
 623                      (const_int 0) (const_int 1)])))]
 624   "")
 625
 626 (define_insn "*vsx_le_perm_store_v8hi"
 627   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
 628         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 629   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 630   "#"
 631   [(set_attr "type" "vecstore")
 632    (set_attr "length" "12")])
 633
 634 (define_split
 635   [(set (match_operand:V8HI 0 "memory_operand" "")
 636         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 637   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 638   [(set (match_dup 2)
 639         (vec_select:V8HI
 640           (match_dup 1)
 641           (parallel [(const_int 4) (const_int 5)
 642                      (const_int 6) (const_int 7)
 643                      (const_int 0) (const_int 1)
 644                      (const_int 2) (const_int 3)])))
 645    (set (match_dup 0)
 646         (vec_select:V8HI
 647           (match_dup 2)
 648           (parallel [(const_int 4) (const_int 5)
 649                      (const_int 6) (const_int 7)
 650                      (const_int 0) (const_int 1)
 651                      (const_int 2) (const_int 3)])))]
 652 {
 653   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 654                                        : operands[1];
 655 })
 656
 657 ;; The post-reload split requires that we re-permute the source
 658 ;; register in case it is still live.
 659 (define_split
 660   [(set (match_operand:V8HI 0 "memory_operand" "")
 661         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 662   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 663   [(set (match_dup 1)
 664         (vec_select:V8HI
 665           (match_dup 1)
 666           (parallel [(const_int 4) (const_int 5)
 667                      (const_int 6) (const_int 7)
 668                      (const_int 0) (const_int 1)
 669                      (const_int 2) (const_int 3)])))
 670    (set (match_dup 0)
 671         (vec_select:V8HI
 672           (match_dup 1)
 673           (parallel [(const_int 4) (const_int 5)
 674                      (const_int 6) (const_int 7)
 675                      (const_int 0) (const_int 1)
 676                      (const_int 2) (const_int 3)])))
 677    (set (match_dup 1)
 678         (vec_select:V8HI
 679           (match_dup 1)
 680           (parallel [(const_int 4) (const_int 5)
 681                      (const_int 6) (const_int 7)
 682                      (const_int 0) (const_int 1)
 683                      (const_int 2) (const_int 3)])))]
 684   "")
 685
 686 (define_insn "*vsx_le_perm_store_v16qi"
 687   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
 688         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 689   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 690   "#"
 691   [(set_attr "type" "vecstore")
 692    (set_attr "length" "12")])
 693
 694 (define_split
 695   [(set (match_operand:V16QI 0 "memory_operand" "")
 696         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 697   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 698   [(set (match_dup 2)
 699         (vec_select:V16QI
 700           (match_dup 1)
 701           (parallel [(const_int 8) (const_int 9)
 702                      (const_int 10) (const_int 11)
 703                      (const_int 12) (const_int 13)
 704                      (const_int 14) (const_int 15)
 705                      (const_int 0) (const_int 1)
 706                      (const_int 2) (const_int 3)
 707                      (const_int 4) (const_int 5)
 708                      (const_int 6) (const_int 7)])))
 709    (set (match_dup 0)
 710         (vec_select:V16QI
 711           (match_dup 2)
 712           (parallel [(const_int 8) (const_int 9)
 713                      (const_int 10) (const_int 11)
 714                      (const_int 12) (const_int 13)
 715                      (const_int 14) (const_int 15)
 716                      (const_int 0) (const_int 1)
 717                      (const_int 2) (const_int 3)
 718                      (const_int 4) (const_int 5)
 719                      (const_int 6) (const_int 7)])))]
 720 {
 721   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 722                                        : operands[1];
 723 })
 724
 725 ;; The post-reload split requires that we re-permute the source
 726 ;; register in case it is still live.
 727 (define_split
 728   [(set (match_operand:V16QI 0 "memory_operand" "")
 729         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 730   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 731   [(set (match_dup 1)
 732         (vec_select:V16QI
 733           (match_dup 1)
 734           (parallel [(const_int 8) (const_int 9)
 735                      (const_int 10) (const_int 11)
 736                      (const_int 12) (const_int 13)
 737                      (const_int 14) (const_int 15)
 738                      (const_int 0) (const_int 1)
 739                      (const_int 2) (const_int 3)
 740                      (const_int 4) (const_int 5)
 741                      (const_int 6) (const_int 7)])))
 742    (set (match_dup 0)
 743         (vec_select:V16QI
 744           (match_dup 1)
 745           (parallel [(const_int 8) (const_int 9)
 746                      (const_int 10) (const_int 11)
 747                      (const_int 12) (const_int 13)
 748                      (const_int 14) (const_int 15)
 749                      (const_int 0) (const_int 1)
 750                      (const_int 2) (const_int 3)
 751                      (const_int 4) (const_int 5)
 752                      (const_int 6) (const_int 7)])))
 753    (set (match_dup 1)
 754         (vec_select:V16QI
 755           (match_dup 1)
 756           (parallel [(const_int 8) (const_int 9)
 757                      (const_int 10) (const_int 11)
 758                      (const_int 12) (const_int 13)
 759                      (const_int 14) (const_int 15)
 760                      (const_int 0) (const_int 1)
 761                      (const_int 2) (const_int 3)
 762                      (const_int 4) (const_int 5)
 763                      (const_int 6) (const_int 7)])))]
 764   "")
 765
 766 ;; Little endian word swapping for 128-bit types that are either scalars or the
 767 ;; special V1TI container class, which it is not appropriate to use vec_select
 768 ;; for the type.
 769 (define_insn "*vsx_le_permute_<mode>"
 770   [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
 771         (rotate:VSX_TI
 772          (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
 773          (const_int 64)))]
 774   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 775   "@
 776    xxpermdi %x0,%x1,%x1,2
 777    lxvd2x %x0,%y1
 778    stxvd2x %x1,%y0
 779    mr %0,%L1\;mr %L0,%1
 780    ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
 781    std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
 782   [(set_attr "length" "4,4,4,8,8,8")
 783    (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
 784
 785 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
 786   [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
 787         (rotate:VSX_TI
 788          (rotate:VSX_TI
 789           (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
 790           (const_int 64))
 791          (const_int 64)))]
 792   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 793   "@
 794    #
 795    xxlor %x0,%x1"
 796   ""
 797   [(set (match_dup 0) (match_dup 1))]
 798 {
 799   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
 800     {
 801       emit_note (NOTE_INSN_DELETED);
 802       DONE;
 803     }
 804 }
 805   [(set_attr "length" "0,4")
 806    (set_attr "type" "veclogical")])
 807
 808 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 809   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
 810         (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
 811   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 812   "@
 813    #
 814    #"
 815   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 816   [(const_int 0)]
 817   "
 818 {
 819   rtx tmp = (can_create_pseudo_p ()
 820              ? gen_reg_rtx_and_attrs (operands[0])
 821              : operands[0]);
 822   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
 823   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
 824   DONE;
 825 }
 826   "
 827   [(set_attr "type" "vecload,load")
 828    (set_attr "length" "8,8")])
 829
 830 (define_insn "*vsx_le_perm_store_<mode>"
 831   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
 832         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
 833   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 834   "@
 835    #
 836    #"
 837   [(set_attr "type" "vecstore,store")
 838    (set_attr "length" "12,8")])
 839
 840 (define_split
 841   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
 842         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
 843   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
 844   [(const_int 0)]
 845 {
 846   rtx tmp = (can_create_pseudo_p ()
 847              ? gen_reg_rtx_and_attrs (operands[0])
 848              : operands[0]);
 849   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
 850   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
 851   DONE;
 852 })
 853
 854 ;; Peepholes to catch loads and stores for TImode if TImode landed in
 855 ;; GPR registers on a little endian system.
 856 (define_peephole2
 857   [(set (match_operand:VSX_TI 0 "int_reg_operand")
 858         (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
 859                        (const_int 64)))
 860    (set (match_operand:VSX_TI 2 "int_reg_operand")
 861         (rotate:VSX_TI (match_dup 0)
 862                        (const_int 64)))]
 863   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
 864    && (rtx_equal_p (operands[0], operands[2])
 865        || peep2_reg_dead_p (2, operands[0]))"
 866    [(set (match_dup 2) (match_dup 1))])
 867
 868 (define_peephole2
 869   [(set (match_operand:VSX_TI 0 "int_reg_operand")
 870         (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
 871                        (const_int 64)))
 872    (set (match_operand:VSX_TI 2 "memory_operand")
 873         (rotate:VSX_TI (match_dup 0)
 874                        (const_int 64)))]
 875   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
 876    && peep2_reg_dead_p (2, operands[0])"
 877    [(set (match_dup 2) (match_dup 1))])
 878
 879 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
 880 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
 881 ;; floating point are handled by the more generic swap elimination pass.
 882 (define_peephole2
 883   [(set (match_operand:TI 0 "vsx_register_operand" "")
 884         (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
 885                    (const_int 64)))
 886    (set (match_operand:TI 2 "vsx_register_operand" "")
 887         (rotate:TI (match_dup 0)
 888                    (const_int 64)))]
 889   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
 890    && (rtx_equal_p (operands[0], operands[2])
 891        || peep2_reg_dead_p (2, operands[0]))"
 892    [(set (match_dup 2) (match_dup 1))])
 893
 894 ;; The post-reload split requires that we re-permute the source
 895 ;; register in case it is still live.
 896 (define_split
 897   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
 898         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
 899   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
 900   [(const_int 0)]
 901 {
 902   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
 903   rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
 904   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
 905   DONE;
 906 })
 907
 908 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
 909 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
 910 (define_insn "xxspltib_v16qi"
 911   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 912         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
 913   "TARGET_P9_VECTOR"
 914 {
 915   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
 916   return "xxspltib %x0,%2";
 917 }
 918   [(set_attr "type" "vecperm")])
 919
 920 (define_insn "xxspltib_<mode>_nosplit"
 921   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
 922         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
 923   "TARGET_P9_VECTOR"
 924 {
 925   rtx op1 = operands[1];
 926   int value = 256;
 927   int num_insns = -1;
 928
 929   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
 930       || num_insns != 1)
 931     gcc_unreachable ();
 932
 933   operands[2] = GEN_INT (value & 0xff);
 934   return "xxspltib %x0,%2";
 935 }
 936   [(set_attr "type" "vecperm")])
 937
 938 (define_insn_and_split "*xxspltib_<mode>_split"
 939   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
 940         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
 941   "TARGET_P9_VECTOR"
 942   "#"
 943   "&& 1"
 944   [(const_int 0)]
 945 {
 946   int value = 256;
 947   int num_insns = -1;
 948   rtx op0 = operands[0];
 949   rtx op1 = operands[1];
 950   rtx tmp = ((can_create_pseudo_p ())
 951              ? gen_reg_rtx (V16QImode)
 952              : gen_lowpart (V16QImode, op0));
 953
 954   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
 955       || num_insns != 2)
 956     gcc_unreachable ();
 957
 958   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
 959
 960   if (<MODE>mode == V2DImode)
 961     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
 962
 963   else if (<MODE>mode == V4SImode)
 964     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
 965
 966   else if (<MODE>mode == V8HImode)
 967     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
 968
 969   else
 970     gcc_unreachable ();
 971
 972   DONE;
 973 }
 974   [(set_attr "type" "vecperm")
 975    (set_attr "length" "8")])
 976
 977
 978 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
 979 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
 980 ;; all 1's, since the machine does not have to wait for the previous
 981 ;; instruction using the register being set (such as a store waiting on a slow
 982 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
 983
 984 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
 985 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
 986 ;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
 987 (define_insn "*vsx_mov<mode>_64bit"
 988   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
 989                "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
 990                 ?&r,       ??r,       ??Y,       ??r,       wo,        v,
 991                 ?<VSa>,    *r,        v,         ??r,       wZ,        v")
 992
 993         (match_operand:VSX_M 1 "input_operand"
 994                "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
 995                 wQ,        Y,         r,         r,         wE,        jwM,
 996                 ?jwM,      jwM,       W,         W,         v,         wZ"))]
 997
 998   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
 999    && (register_operand (operands[0], <MODE>mode)
1000        || register_operand (operands[1], <MODE>mode))"
1001 {
1002   return rs6000_output_move_128bit (operands);
1003 }
1004   [(set_attr "type"
1005                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1006                 store,     load,      store,     *,         vecsimple, vecsimple,
1007                 vecsimple, *,         *,         *,         vecstore,  vecload")
1008
1009    (set_attr "length"
1010                "4,         4,         4,         8,         4,         8,
1011                 8,         8,         8,         8,         4,         4,
1012                 4,         8,         20,        20,        4,         4")])
1013
1014 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1015 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
1016 ;;              LVX (VMX)  STVX (VMX)
1017 (define_insn "*vsx_mov<mode>_32bit"
1018   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1019                "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       ??r,
1020                 wo,        v,         ?<VSa>,    *r,        v,         ??r,
1021                 wZ,        v")
1022
1023         (match_operand:VSX_M 1 "input_operand"
1024                "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
1025                 wE,        jwM,       ?jwM,      jwM,       W,         W,
1026                 v,         wZ"))]
1027
1028   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1029    && (register_operand (operands[0], <MODE>mode)
1030        || register_operand (operands[1], <MODE>mode))"
1031 {
1032   return rs6000_output_move_128bit (operands);
1033 }
1034   [(set_attr "type"
1035                "vecstore,  vecload,   vecsimple, load,      store,    *,
1036                 vecsimple, vecsimple, vecsimple, *,         *,        *,
1037                 vecstore,  vecload")
1038
1039    (set_attr "length"
1040                "4,         4,         4,         16,        16,        16,
1041                 4,         4,         4,         16,        20,        32,
1042                 4,         4")])
1043
1044 ;; Explicit  load/store expanders for the builtin functions
1045 (define_expand "vsx_load_<mode>"
1046   [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
1047         (match_operand:VSX_M 1 "memory_operand" ""))]
1048   "VECTOR_MEM_VSX_P (<MODE>mode)"
1049 {
1050   /* Expand to swaps if needed, prior to swap optimization.  */
1051   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1052     {
1053       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1054       DONE;
1055     }
1056 })
1057
1058 (define_expand "vsx_store_<mode>"
1059   [(set (match_operand:VSX_M 0 "memory_operand" "")
1060         (match_operand:VSX_M 1 "vsx_register_operand" ""))]
1061   "VECTOR_MEM_VSX_P (<MODE>mode)"
1062 {
1063   /* Expand to swaps if needed, prior to swap optimization.  */
1064   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1065     {
1066       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1067       DONE;
1068     }
1069 })
1070
1071 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1072 ;; when you really want their element-reversing behavior.
1073 (define_insn "vsx_ld_elemrev_v2di"
1074   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1075         (vec_select:V2DI
1076           (match_operand:V2DI 1 "memory_operand" "Z")
1077           (parallel [(const_int 1) (const_int 0)])))]
1078   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1079   "lxvd2x %x0,%y1"
1080   [(set_attr "type" "vecload")])
1081
1082 (define_insn "vsx_ld_elemrev_v2df"
1083   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1084         (vec_select:V2DF
1085           (match_operand:V2DF 1 "memory_operand" "Z")
1086           (parallel [(const_int 1) (const_int 0)])))]
1087   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1088   "lxvd2x %x0,%y1"
1089   [(set_attr "type" "vecload")])
1090
1091 (define_insn "vsx_ld_elemrev_v4si"
1092   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1093         (vec_select:V4SI
1094           (match_operand:V4SI 1 "memory_operand" "Z")
1095           (parallel [(const_int 3) (const_int 2)
1096                      (const_int 1) (const_int 0)])))]
1097   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1098   "lxvw4x %x0,%y1"
1099   [(set_attr "type" "vecload")])
1100
1101 (define_insn "vsx_ld_elemrev_v4sf"
1102   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1103         (vec_select:V4SF
1104           (match_operand:V4SF 1 "memory_operand" "Z")
1105           (parallel [(const_int 3) (const_int 2)
1106                      (const_int 1) (const_int 0)])))]
1107   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1108   "lxvw4x %x0,%y1"
1109   [(set_attr "type" "vecload")])
1110
1111 (define_insn "vsx_ld_elemrev_v8hi"
1112   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1113         (vec_select:V8HI
1114           (match_operand:V8HI 1 "memory_operand" "Z")
1115           (parallel [(const_int 7) (const_int 6)
1116                      (const_int 5) (const_int 4)
1117                      (const_int 3) (const_int 2)
1118                      (const_int 1) (const_int 0)])))]
1119   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1120   "lxvh8x %x0,%y1"
1121   [(set_attr "type" "vecload")])
1122
1123 (define_insn "vsx_ld_elemrev_v16qi"
1124   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1125         (vec_select:V16QI
1126           (match_operand:V16QI 1 "memory_operand" "Z")
1127           (parallel [(const_int 15) (const_int 14)
1128                      (const_int 13) (const_int 12)
1129                      (const_int 11) (const_int 10)
1130                      (const_int  9) (const_int  8)
1131                      (const_int  7) (const_int  6)
1132                      (const_int  5) (const_int  4)
1133                      (const_int  3) (const_int  2)
1134                      (const_int  1) (const_int  0)])))]
1135   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1136   "lxvb16x %x0,%y1"
1137   [(set_attr "type" "vecload")])
1138
1139 (define_insn "vsx_st_elemrev_v2df"
1140   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1141         (vec_select:V2DF
1142           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1143           (parallel [(const_int 1) (const_int 0)])))]
1144   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1145   "stxvd2x %x1,%y0"
1146   [(set_attr "type" "vecstore")])
1147
1148 (define_insn "vsx_st_elemrev_v2di"
1149   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1150         (vec_select:V2DI
1151           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1152           (parallel [(const_int 1) (const_int 0)])))]
1153   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1154   "stxvd2x %x1,%y0"
1155   [(set_attr "type" "vecstore")])
1156
1157 (define_insn "vsx_st_elemrev_v4sf"
1158   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1159         (vec_select:V4SF
1160           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1161           (parallel [(const_int 3) (const_int 2)
1162                      (const_int 1) (const_int 0)])))]
1163   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1164   "stxvw4x %x1,%y0"
1165   [(set_attr "type" "vecstore")])
1166
1167 (define_insn "vsx_st_elemrev_v4si"
1168   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1169         (vec_select:V4SI
1170           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1171           (parallel [(const_int 3) (const_int 2)
1172                      (const_int 1) (const_int 0)])))]
1173   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1174   "stxvw4x %x1,%y0"
1175   [(set_attr "type" "vecstore")])
1176
1177 (define_insn "vsx_st_elemrev_v8hi"
1178   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1179         (vec_select:V8HI
1180           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1181           (parallel [(const_int 7) (const_int 6)
1182                      (const_int 5) (const_int 4)
1183                      (const_int 3) (const_int 2)
1184                      (const_int 1) (const_int 0)])))]
1185   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1186   "stxvh8x %x1,%y0"
1187   [(set_attr "type" "vecstore")])
1188
1189 (define_insn "vsx_st_elemrev_v16qi"
1190   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1191         (vec_select:V16QI
1192           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1193           (parallel [(const_int 15) (const_int 14)
1194                      (const_int 13) (const_int 12)
1195                      (const_int 11) (const_int 10)
1196                      (const_int  9) (const_int  8)
1197                      (const_int  7) (const_int  6)
1198                      (const_int  5) (const_int  4)
1199                      (const_int  3) (const_int  2)
1200                      (const_int  1) (const_int  0)])))]
1201   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1202   "stxvb16x %x1,%y0"
1203   [(set_attr "type" "vecstore")])
1204
1205 \f
1206 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1207 ;; instructions are now combined with the insn for the traditional floating
1208 ;; point unit.
1209 (define_insn "*vsx_add<mode>3"
1210   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1211         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1212                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1213   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1214   "xvadd<VSs> %x0,%x1,%x2"
1215   [(set_attr "type" "<VStype_simple>")
1216    (set_attr "fp_type" "<VSfptype_simple>")])
1217
1218 (define_insn "*vsx_sub<mode>3"
1219   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1220         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1221                      (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1222   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1223   "xvsub<VSs> %x0,%x1,%x2"
1224   [(set_attr "type" "<VStype_simple>")
1225    (set_attr "fp_type" "<VSfptype_simple>")])
1226
1227 (define_insn "*vsx_mul<mode>3"
1228   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1229         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1230                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1231   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1232   "xvmul<VSs> %x0,%x1,%x2"
1233   [(set_attr "type" "<VStype_simple>")
1234    (set_attr "fp_type" "<VSfptype_mul>")])
1235
1236 ; Emulate vector with scalar for vec_mul in V2DImode
1237 (define_insn_and_split "vsx_mul_v2di"
1238   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1239         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1240                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1241                      UNSPEC_VSX_MULSD))]
1242   "VECTOR_MEM_VSX_P (V2DImode)"
1243   "#"
1244   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1245   [(const_int 0)]
1246   "
1247 {
1248   rtx op0 = operands[0];
1249   rtx op1 = operands[1];
1250   rtx op2 = operands[2];
1251   rtx op3 = gen_reg_rtx (DImode);
1252   rtx op4 = gen_reg_rtx (DImode);
1253   rtx op5 = gen_reg_rtx (DImode);
1254   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1255   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1256   emit_insn (gen_muldi3 (op5, op3, op4));
1257   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1258   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1259   emit_insn (gen_muldi3 (op3, op3, op4));
1260   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1261   DONE;
1262 }"
1263   [(set_attr "type" "mul")])
1264
1265 (define_insn "*vsx_div<mode>3"
1266   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1267         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1268                    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1269   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1270   "xvdiv<VSs> %x0,%x1,%x2"
1271   [(set_attr "type" "<VStype_div>")
1272    (set_attr "fp_type" "<VSfptype_div>")])
1273
1274 ; Emulate vector with scalar for vec_div in V2DImode
1275 (define_insn_and_split "vsx_div_v2di"
1276   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1277         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1278                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1279                      UNSPEC_VSX_DIVSD))]
1280   "VECTOR_MEM_VSX_P (V2DImode)"
1281   "#"
1282   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1283   [(const_int 0)]
1284   "
1285 {
1286   rtx op0 = operands[0];
1287   rtx op1 = operands[1];
1288   rtx op2 = operands[2];
1289   rtx op3 = gen_reg_rtx (DImode);
1290   rtx op4 = gen_reg_rtx (DImode);
1291   rtx op5 = gen_reg_rtx (DImode);
1292   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1293   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1294   emit_insn (gen_divdi3 (op5, op3, op4));
1295   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1296   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1297   emit_insn (gen_divdi3 (op3, op3, op4));
1298   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1299   DONE;
1300 }"
1301   [(set_attr "type" "div")])
1302
1303 (define_insn_and_split "vsx_udiv_v2di"
1304   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1305         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1306                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1307                      UNSPEC_VSX_DIVUD))]
1308   "VECTOR_MEM_VSX_P (V2DImode)"
1309   "#"
1310   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1311   [(const_int 0)]
1312   "
1313 {
1314   rtx op0 = operands[0];
1315   rtx op1 = operands[1];
1316   rtx op2 = operands[2];
1317   rtx op3 = gen_reg_rtx (DImode);
1318   rtx op4 = gen_reg_rtx (DImode);
1319   rtx op5 = gen_reg_rtx (DImode);
1320   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1321   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1322   emit_insn (gen_udivdi3 (op5, op3, op4));
1323   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1324   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1325   emit_insn (gen_udivdi3 (op3, op3, op4));
1326   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1327   DONE;
1328 }"
1329   [(set_attr "type" "div")])
1330
1331 ;; *tdiv* instruction returning the FG flag
1332 (define_expand "vsx_tdiv<mode>3_fg"
1333   [(set (match_dup 3)
1334         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1335                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
1336                      UNSPEC_VSX_TDIV))
1337    (set (match_operand:SI 0 "gpc_reg_operand" "")
1338         (gt:SI (match_dup 3)
1339                (const_int 0)))]
1340   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1341 {
1342   operands[3] = gen_reg_rtx (CCFPmode);
1343 })
1344
1345 ;; *tdiv* instruction returning the FE flag
1346 (define_expand "vsx_tdiv<mode>3_fe"
1347   [(set (match_dup 3)
1348         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1349                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
1350                      UNSPEC_VSX_TDIV))
1351    (set (match_operand:SI 0 "gpc_reg_operand" "")
1352         (eq:SI (match_dup 3)
1353                (const_int 0)))]
1354   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1355 {
1356   operands[3] = gen_reg_rtx (CCFPmode);
1357 })
1358
1359 (define_insn "*vsx_tdiv<mode>3_internal"
1360   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1361         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1362                       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1363                    UNSPEC_VSX_TDIV))]
1364   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1365   "x<VSv>tdiv<VSs> %0,%x1,%x2"
1366   [(set_attr "type" "<VStype_simple>")
1367    (set_attr "fp_type" "<VSfptype_simple>")])
1368
1369 (define_insn "vsx_fre<mode>2"
1370   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1371         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1372                       UNSPEC_FRES))]
1373   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1374   "xvre<VSs> %x0,%x1"
1375   [(set_attr "type" "<VStype_simple>")
1376    (set_attr "fp_type" "<VSfptype_simple>")])
1377
1378 (define_insn "*vsx_neg<mode>2"
1379   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1380         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1381   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1382   "xvneg<VSs> %x0,%x1"
1383   [(set_attr "type" "<VStype_simple>")
1384    (set_attr "fp_type" "<VSfptype_simple>")])
1385
1386 (define_insn "*vsx_abs<mode>2"
1387   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1388         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1389   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1390   "xvabs<VSs> %x0,%x1"
1391   [(set_attr "type" "<VStype_simple>")
1392    (set_attr "fp_type" "<VSfptype_simple>")])
1393
1394 (define_insn "vsx_nabs<mode>2"
1395   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1396         (neg:VSX_F
1397          (abs:VSX_F
1398           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1399   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1400   "xvnabs<VSs> %x0,%x1"
1401   [(set_attr "type" "<VStype_simple>")
1402    (set_attr "fp_type" "<VSfptype_simple>")])
1403
1404 (define_insn "vsx_smax<mode>3"
1405   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1406         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1407                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1408   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1409   "xvmax<VSs> %x0,%x1,%x2"
1410   [(set_attr "type" "<VStype_simple>")
1411    (set_attr "fp_type" "<VSfptype_simple>")])
1412
1413 (define_insn "*vsx_smin<mode>3"
1414   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1415         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1416                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1417   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1418   "xvmin<VSs> %x0,%x1,%x2"
1419   [(set_attr "type" "<VStype_simple>")
1420    (set_attr "fp_type" "<VSfptype_simple>")])
1421
1422 (define_insn "*vsx_sqrt<mode>2"
1423   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1424         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1425   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1426   "xvsqrt<VSs> %x0,%x1"
1427   [(set_attr "type" "<VStype_sqrt>")
1428    (set_attr "fp_type" "<VSfptype_sqrt>")])
1429
1430 (define_insn "*vsx_rsqrte<mode>2"
1431   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1432         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1433                       UNSPEC_RSQRT))]
1434   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1435   "xvrsqrte<VSs> %x0,%x1"
1436   [(set_attr "type" "<VStype_simple>")
1437    (set_attr "fp_type" "<VSfptype_simple>")])
1438
1439 ;; *tsqrt* returning the fg flag
1440 (define_expand "vsx_tsqrt<mode>2_fg"
1441   [(set (match_dup 2)
1442         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1443                      UNSPEC_VSX_TSQRT))
1444    (set (match_operand:SI 0 "gpc_reg_operand" "")
1445         (gt:SI (match_dup 2)
1446                (const_int 0)))]
1447   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1448 {
1449   operands[2] = gen_reg_rtx (CCFPmode);
1450 })
1451
1452 ;; *tsqrt* returning the fe flag
1453 (define_expand "vsx_tsqrt<mode>2_fe"
1454   [(set (match_dup 2)
1455         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1456                      UNSPEC_VSX_TSQRT))
1457    (set (match_operand:SI 0 "gpc_reg_operand" "")
1458         (eq:SI (match_dup 2)
1459                (const_int 0)))]
1460   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1461 {
1462   operands[2] = gen_reg_rtx (CCFPmode);
1463 })
1464
1465 (define_insn "*vsx_tsqrt<mode>2_internal"
1466   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1467         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1468                      UNSPEC_VSX_TSQRT))]
1469   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1470   "x<VSv>tsqrt<VSs> %0,%x1"
1471   [(set_attr "type" "<VStype_simple>")
1472    (set_attr "fp_type" "<VSfptype_simple>")])
1473
1474 ;; Fused vector multiply/add instructions. Support the classical Altivec
1475 ;; versions of fma, which allows the target to be a separate register from the
1476 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1477 ;; multiply.
1478
1479 (define_insn "*vsx_fmav4sf4"
1480   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1481         (fma:V4SF
1482           (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1483           (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1484           (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1485   "VECTOR_UNIT_VSX_P (V4SFmode)"
1486   "@
1487    xvmaddasp %x0,%x1,%x2
1488    xvmaddmsp %x0,%x1,%x3
1489    xvmaddasp %x0,%x1,%x2
1490    xvmaddmsp %x0,%x1,%x3
1491    vmaddfp %0,%1,%2,%3"
1492   [(set_attr "type" "vecfloat")])
1493
1494 (define_insn "*vsx_fmav2df4"
1495   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1496         (fma:V2DF
1497           (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1498           (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1499           (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1500   "VECTOR_UNIT_VSX_P (V2DFmode)"
1501   "@
1502    xvmaddadp %x0,%x1,%x2
1503    xvmaddmdp %x0,%x1,%x3
1504    xvmaddadp %x0,%x1,%x2
1505    xvmaddmdp %x0,%x1,%x3"
1506   [(set_attr "type" "vecdouble")])
1507
1508 (define_insn "*vsx_fms<mode>4"
1509   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1510         (fma:VSX_F
1511           (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1512           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1513           (neg:VSX_F
1514             (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1515   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1516   "@
1517    xvmsuba<VSs> %x0,%x1,%x2
1518    xvmsubm<VSs> %x0,%x1,%x3
1519    xvmsuba<VSs> %x0,%x1,%x2
1520    xvmsubm<VSs> %x0,%x1,%x3"
1521   [(set_attr "type" "<VStype_mul>")])
1522
1523 (define_insn "*vsx_nfma<mode>4"
1524   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1525         (neg:VSX_F
1526          (fma:VSX_F
1527           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1528           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1529           (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1530   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1531   "@
1532    xvnmadda<VSs> %x0,%x1,%x2
1533    xvnmaddm<VSs> %x0,%x1,%x3
1534    xvnmadda<VSs> %x0,%x1,%x2
1535    xvnmaddm<VSs> %x0,%x1,%x3"
1536   [(set_attr "type" "<VStype_mul>")
1537    (set_attr "fp_type" "<VSfptype_mul>")])
1538
1539 (define_insn "*vsx_nfmsv4sf4"
1540   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1541         (neg:V4SF
1542          (fma:V4SF
1543            (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1544            (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1545            (neg:V4SF
1546              (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1547   "VECTOR_UNIT_VSX_P (V4SFmode)"
1548   "@
1549    xvnmsubasp %x0,%x1,%x2
1550    xvnmsubmsp %x0,%x1,%x3
1551    xvnmsubasp %x0,%x1,%x2
1552    xvnmsubmsp %x0,%x1,%x3
1553    vnmsubfp %0,%1,%2,%3"
1554   [(set_attr "type" "vecfloat")])
1555
1556 (define_insn "*vsx_nfmsv2df4"
1557   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1558         (neg:V2DF
1559          (fma:V2DF
1560            (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1561            (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1562            (neg:V2DF
1563              (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1564   "VECTOR_UNIT_VSX_P (V2DFmode)"
1565   "@
1566    xvnmsubadp %x0,%x1,%x2
1567    xvnmsubmdp %x0,%x1,%x3
1568    xvnmsubadp %x0,%x1,%x2
1569    xvnmsubmdp %x0,%x1,%x3"
1570   [(set_attr "type" "vecdouble")])
1571
1572 ;; Vector conditional expressions (no scalar version for these instructions)
1573 (define_insn "vsx_eq<mode>"
1574   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1575         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1576                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1577   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1578   "xvcmpeq<VSs> %x0,%x1,%x2"
1579   [(set_attr "type" "<VStype_simple>")
1580    (set_attr "fp_type" "<VSfptype_simple>")])
1581
1582 (define_insn "vsx_gt<mode>"
1583   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1584         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1585                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1586   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1587   "xvcmpgt<VSs> %x0,%x1,%x2"
1588   [(set_attr "type" "<VStype_simple>")
1589    (set_attr "fp_type" "<VSfptype_simple>")])
1590
1591 (define_insn "*vsx_ge<mode>"
1592   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1593         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1594                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1595   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1596   "xvcmpge<VSs> %x0,%x1,%x2"
1597   [(set_attr "type" "<VStype_simple>")
1598    (set_attr "fp_type" "<VSfptype_simple>")])
1599
1600 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1601 ;; indicate a combined status
1602 (define_insn "*vsx_eq_<mode>_p"
1603   [(set (reg:CC CR6_REGNO)
1604         (unspec:CC
1605          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1606                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1607          UNSPEC_PREDICATE))
1608    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1609         (eq:VSX_F (match_dup 1)
1610                   (match_dup 2)))]
1611   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1612   "xvcmpeq<VSs>. %x0,%x1,%x2"
1613   [(set_attr "type" "<VStype_simple>")])
1614
1615 (define_insn "*vsx_gt_<mode>_p"
1616   [(set (reg:CC CR6_REGNO)
1617         (unspec:CC
1618          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1619                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1620          UNSPEC_PREDICATE))
1621    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1622         (gt:VSX_F (match_dup 1)
1623                   (match_dup 2)))]
1624   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1625   "xvcmpgt<VSs>. %x0,%x1,%x2"
1626   [(set_attr "type" "<VStype_simple>")])
1627
1628 (define_insn "*vsx_ge_<mode>_p"
1629   [(set (reg:CC CR6_REGNO)
1630         (unspec:CC
1631          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1632                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1633          UNSPEC_PREDICATE))
1634    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1635         (ge:VSX_F (match_dup 1)
1636                   (match_dup 2)))]
1637   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1638   "xvcmpge<VSs>. %x0,%x1,%x2"
1639   [(set_attr "type" "<VStype_simple>")])
1640
1641 ;; Vector select
1642 (define_insn "*vsx_xxsel<mode>"
1643   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1644         (if_then_else:VSX_L
1645          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1646                 (match_operand:VSX_L 4 "zero_constant" ""))
1647          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1648          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1649   "VECTOR_MEM_VSX_P (<MODE>mode)"
1650   "xxsel %x0,%x3,%x2,%x1"
1651   [(set_attr "type" "vecmove")])
1652
1653 (define_insn "*vsx_xxsel<mode>_uns"
1654   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1655         (if_then_else:VSX_L
1656          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1657                    (match_operand:VSX_L 4 "zero_constant" ""))
1658          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1659          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1660   "VECTOR_MEM_VSX_P (<MODE>mode)"
1661   "xxsel %x0,%x3,%x2,%x1"
1662   [(set_attr "type" "vecmove")])
1663
1664 ;; Copy sign
1665 (define_insn "vsx_copysign<mode>3"
1666   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1667         (unspec:VSX_F
1668          [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1669           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1670          UNSPEC_COPYSIGN))]
1671   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1672   "xvcpsgn<VSs> %x0,%x2,%x1"
1673   [(set_attr "type" "<VStype_simple>")
1674    (set_attr "fp_type" "<VSfptype_simple>")])
1675
1676 ;; For the conversions, limit the register class for the integer value to be
1677 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1678 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1679 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1680 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
1681 ;; in allowing virtual registers.
1682 (define_insn "vsx_float<VSi><mode>2"
1683   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1684         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1685   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1686   "xvcvsx<VSc><VSs> %x0,%x1"
1687   [(set_attr "type" "<VStype_simple>")
1688    (set_attr "fp_type" "<VSfptype_simple>")])
1689
1690 (define_insn "vsx_floatuns<VSi><mode>2"
1691   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1692         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1693   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1694   "xvcvux<VSc><VSs> %x0,%x1"
1695   [(set_attr "type" "<VStype_simple>")
1696    (set_attr "fp_type" "<VSfptype_simple>")])
1697
1698 (define_insn "vsx_fix_trunc<mode><VSi>2"
1699   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1700         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1701   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1702   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1703   [(set_attr "type" "<VStype_simple>")
1704    (set_attr "fp_type" "<VSfptype_simple>")])
1705
1706 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1707   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1708         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1709   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1710   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1711   [(set_attr "type" "<VStype_simple>")
1712    (set_attr "fp_type" "<VSfptype_simple>")])
1713
1714 ;; Math rounding functions
1715 (define_insn "vsx_x<VSv>r<VSs>i"
1716   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1717         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1718                       UNSPEC_VSX_ROUND_I))]
1719   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1720   "x<VSv>r<VSs>i %x0,%x1"
1721   [(set_attr "type" "<VStype_simple>")
1722    (set_attr "fp_type" "<VSfptype_simple>")])
1723
1724 (define_insn "vsx_x<VSv>r<VSs>ic"
1725   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1726         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1727                       UNSPEC_VSX_ROUND_IC))]
1728   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1729   "x<VSv>r<VSs>ic %x0,%x1"
1730   [(set_attr "type" "<VStype_simple>")
1731    (set_attr "fp_type" "<VSfptype_simple>")])
1732
1733 (define_insn "vsx_btrunc<mode>2"
1734   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1735         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1736   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1737   "xvr<VSs>iz %x0,%x1"
1738   [(set_attr "type" "<VStype_simple>")
1739    (set_attr "fp_type" "<VSfptype_simple>")])
1740
1741 (define_insn "*vsx_b2trunc<mode>2"
1742   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1743         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1744                       UNSPEC_FRIZ))]
1745   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1746   "x<VSv>r<VSs>iz %x0,%x1"
1747   [(set_attr "type" "<VStype_simple>")
1748    (set_attr "fp_type" "<VSfptype_simple>")])
1749
1750 (define_insn "vsx_floor<mode>2"
1751   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1752         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1753                       UNSPEC_FRIM))]
1754   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1755   "xvr<VSs>im %x0,%x1"
1756   [(set_attr "type" "<VStype_simple>")
1757    (set_attr "fp_type" "<VSfptype_simple>")])
1758
1759 (define_insn "vsx_ceil<mode>2"
1760   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1761         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1762                       UNSPEC_FRIP))]
1763   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1764   "xvr<VSs>ip %x0,%x1"
1765   [(set_attr "type" "<VStype_simple>")
1766    (set_attr "fp_type" "<VSfptype_simple>")])
1767
1768 \f
1769 ;; VSX convert to/from double vector
1770
1771 ;; Convert between single and double precision
1772 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1773 ;; scalar single precision instructions internally use the double format.
1774 ;; Prefer the altivec registers, since we likely will need to do a vperm
1775 (define_insn "vsx_<VS_spdp_insn>"
1776   [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1777         (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1778                               UNSPEC_VSX_CVSPDP))]
1779   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1780   "<VS_spdp_insn> %x0,%x1"
1781   [(set_attr "type" "<VS_spdp_type>")])
1782
1783 ;; xscvspdp, represent the scalar SF type as V4SF
1784 (define_insn "vsx_xscvspdp"
1785   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1786         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1787                    UNSPEC_VSX_CVSPDP))]
1788   "VECTOR_UNIT_VSX_P (V4SFmode)"
1789   "xscvspdp %x0,%x1"
1790   [(set_attr "type" "fp")])
1791
1792 ;; Same as vsx_xscvspdp, but use SF as the type
1793 (define_insn "vsx_xscvspdp_scalar2"
1794   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
1795         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1796                    UNSPEC_VSX_CVSPDP))]
1797   "VECTOR_UNIT_VSX_P (V4SFmode)"
1798   "xscvspdp %x0,%x1"
1799   [(set_attr "type" "fp")])
1800
1801 ;; Generate xvcvhpsp instruction
1802 (define_insn "vsx_xvcvhpsp"
1803   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1804         (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
1805                      UNSPEC_VSX_CVHPSP))]
1806   "TARGET_P9_VECTOR"
1807   "xvcvhpsp %x0,%x1"
1808   [(set_attr "type" "vecfloat")])
1809
1810 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1811 ;; format of scalars is actually DF.
1812 (define_insn "vsx_xscvdpsp_scalar"
1813   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1814         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
1815                      UNSPEC_VSX_CVSPDP))]
1816   "VECTOR_UNIT_VSX_P (V4SFmode)"
1817   "xscvdpsp %x0,%x1"
1818   [(set_attr "type" "fp")])
1819
1820 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1821 (define_insn "vsx_xscvdpspn"
1822   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
1823         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
1824                      UNSPEC_VSX_CVDPSPN))]
1825   "TARGET_XSCVDPSPN"
1826   "xscvdpspn %x0,%x1"
1827   [(set_attr "type" "fp")])
1828
1829 (define_insn "vsx_xscvspdpn"
1830   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1831         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1832                    UNSPEC_VSX_CVSPDPN))]
1833   "TARGET_XSCVSPDPN"
1834   "xscvspdpn %x0,%x1"
1835   [(set_attr "type" "fp")])
1836
1837 (define_insn "vsx_xscvdpspn_scalar"
1838   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1839         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
1840                      UNSPEC_VSX_CVDPSPN))]
1841   "TARGET_XSCVDPSPN"
1842   "xscvdpspn %x0,%x1"
1843   [(set_attr "type" "fp")])
1844
1845 ;; Used by direct move to move a SFmode value from GPR to VSX register
1846 (define_insn "vsx_xscvspdpn_directmove"
1847   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1848         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
1849                    UNSPEC_VSX_CVSPDPN))]
1850   "TARGET_XSCVSPDPN"
1851   "xscvspdpn %x0,%x1"
1852   [(set_attr "type" "fp")])
1853
1854 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1855
1856 (define_expand "vsx_xvcvsxddp_scale"
1857   [(match_operand:V2DF 0 "vsx_register_operand" "")
1858    (match_operand:V2DI 1 "vsx_register_operand" "")
1859    (match_operand:QI 2 "immediate_operand" "")]
1860   "VECTOR_UNIT_VSX_P (V2DFmode)"
1861 {
1862   rtx op0 = operands[0];
1863   rtx op1 = operands[1];
1864   int scale = INTVAL(operands[2]);
1865   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1866   if (scale != 0)
1867     rs6000_scale_v2df (op0, op0, -scale);
1868   DONE;
1869 })
1870
1871 (define_insn "vsx_xvcvsxddp"
1872   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1873         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1874                      UNSPEC_VSX_XVCVSXDDP))]
1875   "VECTOR_UNIT_VSX_P (V2DFmode)"
1876   "xvcvsxddp %x0,%x1"
1877   [(set_attr "type" "vecdouble")])
1878
1879 (define_expand "vsx_xvcvuxddp_scale"
1880   [(match_operand:V2DF 0 "vsx_register_operand" "")
1881    (match_operand:V2DI 1 "vsx_register_operand" "")
1882    (match_operand:QI 2 "immediate_operand" "")]
1883   "VECTOR_UNIT_VSX_P (V2DFmode)"
1884 {
1885   rtx op0 = operands[0];
1886   rtx op1 = operands[1];
1887   int scale = INTVAL(operands[2]);
1888   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1889   if (scale != 0)
1890     rs6000_scale_v2df (op0, op0, -scale);
1891   DONE;
1892 })
1893
1894 (define_insn "vsx_xvcvuxddp"
1895   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1896         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1897                      UNSPEC_VSX_XVCVUXDDP))]
1898   "VECTOR_UNIT_VSX_P (V2DFmode)"
1899   "xvcvuxddp %x0,%x1"
1900   [(set_attr "type" "vecdouble")])
1901
1902 (define_expand "vsx_xvcvdpsxds_scale"
1903   [(match_operand:V2DI 0 "vsx_register_operand" "")
1904    (match_operand:V2DF 1 "vsx_register_operand" "")
1905    (match_operand:QI 2 "immediate_operand" "")]
1906   "VECTOR_UNIT_VSX_P (V2DFmode)"
1907 {
1908   rtx op0 = operands[0];
1909   rtx op1 = operands[1];
1910   rtx tmp;
1911   int scale = INTVAL (operands[2]);
1912   if (scale == 0)
1913     tmp = op1;
1914   else
1915     {
1916       tmp  = gen_reg_rtx (V2DFmode);
1917       rs6000_scale_v2df (tmp, op1, scale);
1918     }
1919   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1920   DONE;
1921 })
1922
1923 ;; convert vector of 64-bit floating point numbers to vector of
1924 ;; 64-bit signed integer
1925 (define_insn "vsx_xvcvdpsxds"
1926   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1927         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1928                      UNSPEC_VSX_XVCVDPSXDS))]
1929   "VECTOR_UNIT_VSX_P (V2DFmode)"
1930   "xvcvdpsxds %x0,%x1"
1931   [(set_attr "type" "vecdouble")])
1932
1933 ;; convert vector of 32-bit floating point numbers to vector of
1934 ;; 32-bit signed integer
1935 (define_insn "vsx_xvcvspsxws"
1936   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1937         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1938                      UNSPEC_VSX_XVCVSPSXWS))]
1939   "VECTOR_UNIT_VSX_P (V4SFmode)"
1940   "xvcvspsxws %x0,%x1"
1941   [(set_attr "type" "vecfloat")])
1942
1943 ;; convert vector of 64-bit floating point numbers to vector of
1944 ;; 64-bit unsigned integer
1945 (define_expand "vsx_xvcvdpuxds_scale"
1946   [(match_operand:V2DI 0 "vsx_register_operand" "")
1947    (match_operand:V2DF 1 "vsx_register_operand" "")
1948    (match_operand:QI 2 "immediate_operand" "")]
1949   "VECTOR_UNIT_VSX_P (V2DFmode)"
1950 {
1951   rtx op0 = operands[0];
1952   rtx op1 = operands[1];
1953   rtx tmp;
1954   int scale = INTVAL (operands[2]);
1955   if (scale == 0)
1956     tmp = op1;
1957   else
1958     {
1959       tmp = gen_reg_rtx (V2DFmode);
1960       rs6000_scale_v2df (tmp, op1, scale);
1961     }
1962   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1963   DONE;
1964 })
1965
1966 ;; convert vector of 32-bit floating point numbers to vector of
1967 ;; 32-bit unsigned integer
1968 (define_insn "vsx_xvcvspuxws"
1969   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1970         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1971                      UNSPEC_VSX_XVCVSPSXWS))]
1972   "VECTOR_UNIT_VSX_P (V4SFmode)"
1973   "xvcvspuxws %x0,%x1"
1974   [(set_attr "type" "vecfloat")])
1975
1976 (define_insn "vsx_xvcvdpuxds"
1977   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1978         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1979                      UNSPEC_VSX_XVCVDPUXDS))]
1980   "VECTOR_UNIT_VSX_P (V2DFmode)"
1981   "xvcvdpuxds %x0,%x1"
1982   [(set_attr "type" "vecdouble")])
1983
1984 ;; Convert from 64-bit to 32-bit types
1985 ;; Note, favor the Altivec registers since the usual use of these instructions
1986 ;; is in vector converts and we need to use the Altivec vperm instruction.
1987
1988 (define_insn "vsx_xvcvdpsxws"
1989   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1990         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1991                      UNSPEC_VSX_CVDPSXWS))]
1992   "VECTOR_UNIT_VSX_P (V2DFmode)"
1993   "xvcvdpsxws %x0,%x1"
1994   [(set_attr "type" "vecdouble")])
1995
1996 (define_insn "vsx_xvcvdpuxws"
1997   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1998         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1999                      UNSPEC_VSX_CVDPUXWS))]
2000   "VECTOR_UNIT_VSX_P (V2DFmode)"
2001   "xvcvdpuxws %x0,%x1"
2002   [(set_attr "type" "vecdouble")])
2003
2004 (define_insn "vsx_xvcvsxdsp"
2005   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2006         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2007                      UNSPEC_VSX_CVSXDSP))]
2008   "VECTOR_UNIT_VSX_P (V2DFmode)"
2009   "xvcvsxdsp %x0,%x1"
2010   [(set_attr "type" "vecfloat")])
2011
2012 (define_insn "vsx_xvcvuxdsp"
2013   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2014         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2015                      UNSPEC_VSX_CVUXDSP))]
2016   "VECTOR_UNIT_VSX_P (V2DFmode)"
2017   "xvcvuxdsp %x0,%x1"
2018   [(set_attr "type" "vecdouble")])
2019
2020 ;; Convert from 32-bit to 64-bit types
2021 ;; Provide both vector and scalar targets
2022 (define_insn "vsx_xvcvsxwdp"
2023   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2024         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2025                      UNSPEC_VSX_CVSXWDP))]
2026   "VECTOR_UNIT_VSX_P (V2DFmode)"
2027   "xvcvsxwdp %x0,%x1"
2028   [(set_attr "type" "vecdouble")])
2029
2030 (define_insn "vsx_xvcvsxwdp_df"
2031   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2032         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2033                    UNSPEC_VSX_CVSXWDP))]
2034   "TARGET_VSX"
2035   "xvcvsxwdp %x0,%x1"
2036   [(set_attr "type" "vecdouble")])
2037
2038 (define_insn "vsx_xvcvuxwdp"
2039   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2040         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2041                      UNSPEC_VSX_CVUXWDP))]
2042   "VECTOR_UNIT_VSX_P (V2DFmode)"
2043   "xvcvuxwdp %x0,%x1"
2044   [(set_attr "type" "vecdouble")])
2045
2046 (define_insn "vsx_xvcvuxwdp_df"
2047   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2048         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2049                    UNSPEC_VSX_CVUXWDP))]
2050   "TARGET_VSX"
2051   "xvcvuxwdp %x0,%x1"
2052   [(set_attr "type" "vecdouble")])
2053
2054 (define_insn "vsx_xvcvspsxds"
2055   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2056         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2057                      UNSPEC_VSX_CVSPSXDS))]
2058   "VECTOR_UNIT_VSX_P (V2DFmode)"
2059   "xvcvspsxds %x0,%x1"
2060   [(set_attr "type" "vecdouble")])
2061
2062 (define_insn "vsx_xvcvspuxds"
2063   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2064         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2065                      UNSPEC_VSX_CVSPUXDS))]
2066   "VECTOR_UNIT_VSX_P (V2DFmode)"
2067   "xvcvspuxds %x0,%x1"
2068   [(set_attr "type" "vecdouble")])
2069
2070 (define_insn "vsx_xvcvsxwsp"
2071   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2072         (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2073                      UNSPEC_VSX_CVSXWSP))]
2074   "VECTOR_UNIT_VSX_P (V4SFmode)"
2075   "xvcvsxwsp %x0,%x1"
2076   [(set_attr "type" "vecfloat")])
2077
2078 (define_insn "vsx_xvcvuxwsp"
2079   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2080         (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2081                     UNSPEC_VSX_CVUXWSP))]
2082   "VECTOR_UNIT_VSX_P (V4SFmode)"
2083   "xvcvuxwsp %x0,%x1"
2084   [(set_attr "type" "vecfloat")])
2085
2086 ;; Generate float2
2087 ;; convert two long long signed ints to float
2088 (define_expand "float2_v2di"
2089   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2090    (use (match_operand:V2DI 1 "register_operand" "wa"))
2091    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2092  "VECTOR_UNIT_VSX_P (V4SFmode)"
2093 {
2094   rtx rtx_src1, rtx_src2, rtx_dst;
2095
2096   rtx_dst = operands[0];
2097   rtx_src1 = operands[1];
2098   rtx_src2 = operands[2];
2099
2100   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2101   DONE;
2102 })
2103
2104 ;; Generate uns_float2
2105 ;; convert two long long unsigned ints to float
2106 (define_expand "uns_float2_v2di"
2107   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2108    (use (match_operand:V2DI 1 "register_operand" "wa"))
2109    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2110  "VECTOR_UNIT_VSX_P (V4SFmode)"
2111 {
2112   rtx rtx_src1, rtx_src2, rtx_dst;
2113
2114   rtx_dst = operands[0];
2115   rtx_src1 = operands[1];
2116   rtx_src2 = operands[2];
2117
2118   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2119   DONE;
2120 })
2121
2122 ;; Generate floate
2123 ;; convert  double or long long signed to float
2124 ;; (Only even words are valid, BE numbering)
2125 (define_expand "floate<mode>"
2126   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2127    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2128   "VECTOR_UNIT_VSX_P (V4SFmode)"
2129 {
2130   if (VECTOR_ELT_ORDER_BIG)
2131     {
2132       /* Shift left one word to put even word correct location */
2133       rtx rtx_tmp;
2134       rtx rtx_val = GEN_INT (4);
2135
2136       rtx_tmp = gen_reg_rtx (V4SFmode);
2137       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2138       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2139                  rtx_tmp, rtx_tmp, rtx_val));
2140     }
2141   else
2142     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2143
2144   DONE;
2145 })
2146
2147 ;; Generate uns_floate
2148 ;; convert long long unsigned to float
2149 ;; (Only even words are valid, BE numbering)
2150 (define_expand "unsfloatev2di"
2151   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2152    (use (match_operand:V2DI 1 "register_operand" "wa"))]
2153   "VECTOR_UNIT_VSX_P (V4SFmode)"
2154 {
2155   if (VECTOR_ELT_ORDER_BIG)
2156     {
2157       /* Shift left one word to put even word correct location */
2158       rtx rtx_tmp;
2159       rtx rtx_val = GEN_INT (4);
2160
2161       rtx_tmp = gen_reg_rtx (V4SFmode);
2162       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2163       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2164                  rtx_tmp, rtx_tmp, rtx_val));
2165     }
2166   else
2167     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2168
2169   DONE;
2170 })
2171
2172 ;; Generate floato
2173 ;; convert double or long long signed to float
2174 ;; Only odd words are valid, BE numbering)
2175 (define_expand "floato<mode>"
2176   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2177    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2178   "VECTOR_UNIT_VSX_P (V4SFmode)"
2179 {
2180   if (VECTOR_ELT_ORDER_BIG)
2181     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2182   else
2183     {
2184       /* Shift left one word to put odd word correct location */
2185       rtx rtx_tmp;
2186       rtx rtx_val = GEN_INT (4);
2187
2188       rtx_tmp = gen_reg_rtx (V4SFmode);
2189       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2190       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2191                  rtx_tmp, rtx_tmp, rtx_val));
2192     }
2193   DONE;
2194 })
2195
2196 ;; Generate uns_floato
2197 ;; convert long long unsigned to float
2198 ;; (Only odd words are valid, BE numbering)
2199 (define_expand "unsfloatov2di"
2200  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2201   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2202  "VECTOR_UNIT_VSX_P (V4SFmode)"
2203 {
2204   if (VECTOR_ELT_ORDER_BIG)
2205     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2206   else
2207     {
2208       /* Shift left one word to put odd word correct location */
2209       rtx rtx_tmp;
2210       rtx rtx_val = GEN_INT (4);
2211
2212       rtx_tmp = gen_reg_rtx (V4SFmode);
2213       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2214       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2215                  rtx_tmp, rtx_tmp, rtx_val));
2216     }
2217   DONE;
2218 })
2219
2220 ;; Generate vsigned2
2221 ;; convert two double float vectors to a vector of single precision ints
2222 (define_expand "vsigned2_v2df"
2223   [(match_operand:V4SI 0 "register_operand" "=wa")
2224    (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2225                  (match_operand:V2DF 2 "register_operand" "wa")]
2226   UNSPEC_VSX_VSIGNED2)]
2227   "TARGET_VSX"
2228 {
2229   rtx rtx_src1, rtx_src2, rtx_dst;
2230   bool signed_convert=true;
2231
2232   rtx_dst = operands[0];
2233   rtx_src1 = operands[1];
2234   rtx_src2 = operands[2];
2235
2236   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2237   DONE;
2238 })
2239
2240 ;; Generate vsignedo_v2df
2241 ;; signed double float to int convert odd word
2242 (define_expand "vsignedo_v2df"
2243   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2244         (match_operand:V2DF 1 "register_operand" "wa"))]
2245   "TARGET_VSX"
2246 {
2247   if (VECTOR_ELT_ORDER_BIG)
2248     {
2249       rtx rtx_tmp;
2250       rtx rtx_val = GEN_INT (12);
2251       rtx_tmp = gen_reg_rtx (V4SImode);
2252
2253       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2254
2255       /* Big endian word numbering for words in operand is 0 1 2 3.
2256          take (operand[1] operand[1]) and shift left one word
2257          0 1 2 3    0 1 2 3  =>  1 2 3 0
2258          Words 1 and 3 are now are now where they need to be for result.  */
2259
2260       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2261                  rtx_tmp, rtx_val));
2262     }
2263   else
2264     /* Little endian word numbering for operand is 3 2 1 0.
2265        Result words 3 and 1 are where they need to be.  */
2266     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2267
2268   DONE;
2269 }
2270   [(set_attr "type" "veccomplex")])
2271
2272 ;; Generate vsignede_v2df
2273 ;; signed double float to int even word
2274 (define_expand "vsignede_v2df"
2275   [(set (match_operand:V4SI 0 "register_operand" "=v")
2276         (match_operand:V2DF 1 "register_operand" "v"))]
2277   "TARGET_VSX"
2278 {
2279   if (VECTOR_ELT_ORDER_BIG)
2280     /* Big endian word numbering for words in operand is 0 1
2281        Result words 0 is where they need to be.  */
2282     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2283
2284   else
2285     {
2286       rtx rtx_tmp;
2287       rtx rtx_val = GEN_INT (12);
2288       rtx_tmp = gen_reg_rtx (V4SImode);
2289
2290       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2291
2292       /* Little endian word numbering for operand is 3 2 1 0.
2293          take (operand[1] operand[1]) and shift left three words
2294          0 1 2 3   0 1 2 3  =>  3 0 1 2
2295          Words 0 and 2 are now where they need to be for the result.  */
2296       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2297                  rtx_tmp, rtx_val));
2298     }
2299   DONE;
2300 }
2301   [(set_attr "type" "veccomplex")])
2302
2303 ;; Generate unsigned2
2304 ;; convert two double float vectors to a vector of single precision
2305 ;; unsigned ints
2306 (define_expand "vunsigned2_v2df"
2307 [(match_operand:V4SI 0 "register_operand" "=v")
2308  (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2309                (match_operand:V2DF 2 "register_operand" "v")]
2310               UNSPEC_VSX_VSIGNED2)]
2311  "TARGET_VSX"
2312 {
2313   rtx rtx_src1, rtx_src2, rtx_dst;
2314   bool signed_convert=false;
2315
2316   rtx_dst = operands[0];
2317   rtx_src1 = operands[1];
2318   rtx_src2 = operands[2];
2319
2320   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2321   DONE;
2322 })
2323
2324 ;; Generate vunsignedo_v2df
2325 ;; unsigned double float to int convert odd word
2326 (define_expand "vunsignedo_v2df"
2327   [(set (match_operand:V4SI 0 "register_operand" "=v")
2328         (match_operand:V2DF 1 "register_operand" "v"))]
2329   "TARGET_VSX"
2330 {
2331   if (VECTOR_ELT_ORDER_BIG)
2332     {
2333       rtx rtx_tmp;
2334       rtx rtx_val = GEN_INT (12);
2335       rtx_tmp = gen_reg_rtx (V4SImode);
2336
2337       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2338
2339       /* Big endian word numbering for words in operand is 0 1 2 3.
2340          take (operand[1] operand[1]) and shift left one word
2341          0 1 2 3    0 1 2 3  =>  1 2 3 0
2342          Words 1 and 3 are now are now where they need to be for result.  */
2343
2344       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2345                  rtx_tmp, rtx_val));
2346     }
2347   else
2348     /* Little endian word numbering for operand is 3 2 1 0.
2349        Result words 3 and 1 are where they need to be.  */
2350     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2351
2352   DONE;
2353 }
2354   [(set_attr "type" "veccomplex")])
2355
2356 ;; Generate vunsignede_v2df
2357 ;; unsigned double float to int even word
2358 (define_expand "vunsignede_v2df"
2359   [(set (match_operand:V4SI 0 "register_operand" "=v")
2360         (match_operand:V2DF 1 "register_operand" "v"))]
2361   "TARGET_VSX"
2362 {
2363   if (VECTOR_ELT_ORDER_BIG)
2364     /* Big endian word numbering for words in operand is 0 1
2365        Result words 0 is where they need to be.  */
2366     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2367
2368   else
2369     {
2370       rtx rtx_tmp;
2371       rtx rtx_val = GEN_INT (12);
2372       rtx_tmp = gen_reg_rtx (V4SImode);
2373
2374       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2375
2376       /* Little endian word numbering for operand is 3 2 1 0.
2377          take (operand[1] operand[1]) and shift left three words
2378          0 1 2 3   0 1 2 3  =>  3 0 1 2
2379          Words 0 and 2 are now where they need to be for the result.  */
2380       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2381                  rtx_tmp, rtx_val));
2382     }
2383   DONE;
2384 }
2385   [(set_attr "type" "veccomplex")])
2386
2387 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2388 ;; since the xvrdpiz instruction does not truncate the value if the floating
2389 ;; point value is < LONG_MIN or > LONG_MAX.
2390 (define_insn "*vsx_float_fix_v2df2"
2391   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2392         (float:V2DF
2393          (fix:V2DI
2394           (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2395   "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
2396    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2397    && !flag_trapping_math && TARGET_FRIZ"
2398   "xvrdpiz %x0,%x1"
2399   [(set_attr "type" "vecdouble")
2400    (set_attr "fp_type" "fp_addsub_d")])
2401
2402 \f
2403 ;; Permute operations
2404
2405 ;; Build a V2DF/V2DI vector from two scalars
2406 (define_insn "vsx_concat_<mode>"
2407   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2408         (vec_concat:VSX_D
2409          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2410          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2411   "VECTOR_MEM_VSX_P (<MODE>mode)"
2412 {
2413   if (which_alternative == 0)
2414     return (BYTES_BIG_ENDIAN
2415             ? "xxpermdi %x0,%x1,%x2,0"
2416             : "xxpermdi %x0,%x2,%x1,0");
2417
2418   else if (which_alternative == 1)
2419     return (BYTES_BIG_ENDIAN
2420             ? "mtvsrdd %x0,%1,%2"
2421             : "mtvsrdd %x0,%2,%1");
2422
2423   else
2424     gcc_unreachable ();
2425 }
2426   [(set_attr "type" "vecperm")])
2427
2428 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2429 ;; word element in a vector register.
2430 (define_insn "*vsx_concat_<mode>_1"
2431   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2432         (vec_concat:VSX_D
2433          (vec_select:<VS_scalar>
2434           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2435           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2436          (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2437   "VECTOR_MEM_VSX_P (<MODE>mode)"
2438 {
2439   HOST_WIDE_INT dword = INTVAL (operands[2]);
2440   if (BYTES_BIG_ENDIAN)
2441     {
2442       operands[4] = GEN_INT (2*dword);
2443       return "xxpermdi %x0,%x1,%x3,%4";
2444     }
2445   else
2446     {
2447       operands[4] = GEN_INT (!dword);
2448       return "xxpermdi %x0,%x3,%x1,%4";
2449     }
2450 }
2451   [(set_attr "type" "vecperm")])
2452
2453 (define_insn "*vsx_concat_<mode>_2"
2454   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2455         (vec_concat:VSX_D
2456          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2457          (vec_select:<VS_scalar>
2458           (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2459           (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2460   "VECTOR_MEM_VSX_P (<MODE>mode)"
2461 {
2462   HOST_WIDE_INT dword = INTVAL (operands[3]);
2463   if (BYTES_BIG_ENDIAN)
2464     {
2465       operands[4] = GEN_INT (dword);
2466       return "xxpermdi %x0,%x1,%x2,%4";
2467     }
2468   else
2469     {
2470       operands[4] = GEN_INT (2 * !dword);
2471       return "xxpermdi %x0,%x2,%x1,%4";
2472     }
2473 }
2474   [(set_attr "type" "vecperm")])
2475
2476 (define_insn "*vsx_concat_<mode>_3"
2477   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2478         (vec_concat:VSX_D
2479          (vec_select:<VS_scalar>
2480           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2481           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2482          (vec_select:<VS_scalar>
2483           (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2484           (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2485   "VECTOR_MEM_VSX_P (<MODE>mode)"
2486 {
2487   HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2488   HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2489   if (BYTES_BIG_ENDIAN)
2490     {
2491       operands[5] = GEN_INT ((2 * dword1) + dword2);
2492       return "xxpermdi %x0,%x1,%x3,%5";
2493     }
2494   else
2495     {
2496       operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2497       return "xxpermdi %x0,%x3,%x1,%5";
2498     }
2499 }
2500   [(set_attr "type" "vecperm")])
2501
2502 ;; Special purpose concat using xxpermdi to glue two single precision values
2503 ;; together, relying on the fact that internally scalar floats are represented
2504 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2505 (define_insn "vsx_concat_v2sf"
2506   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2507         (unspec:V2DF
2508          [(match_operand:SF 1 "vsx_register_operand" "ww")
2509           (match_operand:SF 2 "vsx_register_operand" "ww")]
2510          UNSPEC_VSX_CONCAT))]
2511   "VECTOR_MEM_VSX_P (V2DFmode)"
2512 {
2513   if (BYTES_BIG_ENDIAN)
2514     return "xxpermdi %x0,%x1,%x2,0";
2515   else
2516     return "xxpermdi %x0,%x2,%x1,0";
2517 }
2518   [(set_attr "type" "vecperm")])
2519
2520 ;; V4SImode initialization splitter
2521 (define_insn_and_split "vsx_init_v4si"
2522   [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2523         (unspec:V4SI
2524          [(match_operand:SI 1 "reg_or_cint_operand" "rn")
2525           (match_operand:SI 2 "reg_or_cint_operand" "rn")
2526           (match_operand:SI 3 "reg_or_cint_operand" "rn")
2527           (match_operand:SI 4 "reg_or_cint_operand" "rn")]
2528          UNSPEC_VSX_VEC_INIT))
2529    (clobber (match_scratch:DI 5 "=&r"))
2530    (clobber (match_scratch:DI 6 "=&r"))]
2531    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2532    "#"
2533    "&& reload_completed"
2534    [(const_int 0)]
2535 {
2536   rs6000_split_v4si_init (operands);
2537   DONE;
2538 })
2539
2540 ;; xxpermdi for little endian loads and stores.  We need several of
2541 ;; these since the form of the PARALLEL differs by mode.
2542 (define_insn "*vsx_xxpermdi2_le_<mode>"
2543   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2544         (vec_select:VSX_D
2545           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2546           (parallel [(const_int 1) (const_int 0)])))]
2547   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2548   "xxpermdi %x0,%x1,%x1,2"
2549   [(set_attr "type" "vecperm")])
2550
2551 (define_insn "*vsx_xxpermdi4_le_<mode>"
2552   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2553         (vec_select:VSX_W
2554           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2555           (parallel [(const_int 2) (const_int 3)
2556                      (const_int 0) (const_int 1)])))]
2557   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2558   "xxpermdi %x0,%x1,%x1,2"
2559   [(set_attr "type" "vecperm")])
2560
2561 (define_insn "*vsx_xxpermdi8_le_V8HI"
2562   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2563         (vec_select:V8HI
2564           (match_operand:V8HI 1 "vsx_register_operand" "wa")
2565           (parallel [(const_int 4) (const_int 5)
2566                      (const_int 6) (const_int 7)
2567                      (const_int 0) (const_int 1)
2568                      (const_int 2) (const_int 3)])))]
2569   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
2570   "xxpermdi %x0,%x1,%x1,2"
2571   [(set_attr "type" "vecperm")])
2572
2573 (define_insn "*vsx_xxpermdi16_le_V16QI"
2574   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2575         (vec_select:V16QI
2576           (match_operand:V16QI 1 "vsx_register_operand" "wa")
2577           (parallel [(const_int 8) (const_int 9)
2578                      (const_int 10) (const_int 11)
2579                      (const_int 12) (const_int 13)
2580                      (const_int 14) (const_int 15)
2581                      (const_int 0) (const_int 1)
2582                      (const_int 2) (const_int 3)
2583                      (const_int 4) (const_int 5)
2584                      (const_int 6) (const_int 7)])))]
2585   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
2586   "xxpermdi %x0,%x1,%x1,2"
2587   [(set_attr "type" "vecperm")])
2588
2589 ;; lxvd2x for little endian loads.  We need several of
2590 ;; these since the form of the PARALLEL differs by mode.
2591 (define_insn "*vsx_lxvd2x2_le_<mode>"
2592   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2593         (vec_select:VSX_D
2594           (match_operand:VSX_D 1 "memory_operand" "Z")
2595           (parallel [(const_int 1) (const_int 0)])))]
2596   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2597   "lxvd2x %x0,%y1"
2598   [(set_attr "type" "vecload")])
2599
2600 (define_insn "*vsx_lxvd2x4_le_<mode>"
2601   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2602         (vec_select:VSX_W
2603           (match_operand:VSX_W 1 "memory_operand" "Z")
2604           (parallel [(const_int 2) (const_int 3)
2605                      (const_int 0) (const_int 1)])))]
2606   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2607   "lxvd2x %x0,%y1"
2608   [(set_attr "type" "vecload")])
2609
2610 (define_insn "*vsx_lxvd2x8_le_V8HI"
2611   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2612         (vec_select:V8HI
2613           (match_operand:V8HI 1 "memory_operand" "Z")
2614           (parallel [(const_int 4) (const_int 5)
2615                      (const_int 6) (const_int 7)
2616                      (const_int 0) (const_int 1)
2617                      (const_int 2) (const_int 3)])))]
2618   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2619   "lxvd2x %x0,%y1"
2620   [(set_attr "type" "vecload")])
2621
2622 (define_insn "*vsx_lxvd2x16_le_V16QI"
2623   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2624         (vec_select:V16QI
2625           (match_operand:V16QI 1 "memory_operand" "Z")
2626           (parallel [(const_int 8) (const_int 9)
2627                      (const_int 10) (const_int 11)
2628                      (const_int 12) (const_int 13)
2629                      (const_int 14) (const_int 15)
2630                      (const_int 0) (const_int 1)
2631                      (const_int 2) (const_int 3)
2632                      (const_int 4) (const_int 5)
2633                      (const_int 6) (const_int 7)])))]
2634   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2635   "lxvd2x %x0,%y1"
2636   [(set_attr "type" "vecload")])
2637
2638 ;; stxvd2x for little endian stores.  We need several of
2639 ;; these since the form of the PARALLEL differs by mode.
2640 (define_insn "*vsx_stxvd2x2_le_<mode>"
2641   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
2642         (vec_select:VSX_D
2643           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2644           (parallel [(const_int 1) (const_int 0)])))]
2645   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2646   "stxvd2x %x1,%y0"
2647   [(set_attr "type" "vecstore")])
2648
2649 (define_insn "*vsx_stxvd2x4_le_<mode>"
2650   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
2651         (vec_select:VSX_W
2652           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2653           (parallel [(const_int 2) (const_int 3)
2654                      (const_int 0) (const_int 1)])))]
2655   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2656   "stxvd2x %x1,%y0"
2657   [(set_attr "type" "vecstore")])
2658
2659 (define_insn "*vsx_stxvd2x8_le_V8HI"
2660   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
2661         (vec_select:V8HI
2662           (match_operand:V8HI 1 "vsx_register_operand" "wa")
2663           (parallel [(const_int 4) (const_int 5)
2664                      (const_int 6) (const_int 7)
2665                      (const_int 0) (const_int 1)
2666                      (const_int 2) (const_int 3)])))]
2667   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2668   "stxvd2x %x1,%y0"
2669   [(set_attr "type" "vecstore")])
2670
2671 (define_insn "*vsx_stxvd2x16_le_V16QI"
2672   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
2673         (vec_select:V16QI
2674           (match_operand:V16QI 1 "vsx_register_operand" "wa")
2675           (parallel [(const_int 8) (const_int 9)
2676                      (const_int 10) (const_int 11)
2677                      (const_int 12) (const_int 13)
2678                      (const_int 14) (const_int 15)
2679                      (const_int 0) (const_int 1)
2680                      (const_int 2) (const_int 3)
2681                      (const_int 4) (const_int 5)
2682                      (const_int 6) (const_int 7)])))]
2683   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2684   "stxvd2x %x1,%y0"
2685   [(set_attr "type" "vecstore")])
2686
2687 ;; Convert a TImode value into V1TImode
2688 (define_expand "vsx_set_v1ti"
2689   [(match_operand:V1TI 0 "nonimmediate_operand" "")
2690    (match_operand:V1TI 1 "nonimmediate_operand" "")
2691    (match_operand:TI 2 "input_operand" "")
2692    (match_operand:QI 3 "u5bit_cint_operand" "")]
2693   "VECTOR_MEM_VSX_P (V1TImode)"
2694 {
2695   if (operands[3] != const0_rtx)
2696     gcc_unreachable ();
2697
2698   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
2699   DONE;
2700 })
2701
2702 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
2703 (define_expand "vsx_set_<mode>"
2704   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
2705    (use (match_operand:VSX_D 1 "vsx_register_operand"))
2706    (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
2707    (use (match_operand:QI 3 "const_0_to_1_operand"))]
2708   "VECTOR_MEM_VSX_P (<MODE>mode)"
2709 {
2710   rtx dest = operands[0];
2711   rtx vec_reg = operands[1];
2712   rtx value = operands[2];
2713   rtx ele = operands[3];
2714   rtx tmp = gen_reg_rtx (<VS_scalar>mode);
2715
2716   if (ele == const0_rtx)
2717     {
2718       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
2719       emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
2720       DONE;
2721     }
2722   else if (ele == const1_rtx)
2723     {
2724       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
2725       emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
2726       DONE;
2727     }
2728   else
2729     gcc_unreachable ();
2730 })
2731
2732 ;; Extract a DF/DI element from V2DF/V2DI
2733 ;; Optimize cases were we can do a simple or direct move.
2734 ;; Or see if we can avoid doing the move at all
2735
2736 ;; There are some unresolved problems with reload that show up if an Altivec
2737 ;; register was picked.  Limit the scalar value to FPRs for now.
2738
2739 (define_insn "vsx_extract_<mode>"
2740   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
2741
2742         (vec_select:<VS_scalar>
2743          (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
2744
2745          (parallel
2746           [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
2747   "VECTOR_MEM_VSX_P (<MODE>mode)"
2748 {
2749   int element = INTVAL (operands[2]);
2750   int op0_regno = REGNO (operands[0]);
2751   int op1_regno = REGNO (operands[1]);
2752   int fldDM;
2753
2754   gcc_assert (IN_RANGE (element, 0, 1));
2755   gcc_assert (VSX_REGNO_P (op1_regno));
2756
2757   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
2758     {
2759       if (op0_regno == op1_regno)
2760         return ASM_COMMENT_START " vec_extract to same register";
2761
2762       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
2763                && TARGET_POWERPC64)
2764         return "mfvsrd %0,%x1";
2765
2766       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
2767         return "fmr %0,%1";
2768
2769       else if (VSX_REGNO_P (op0_regno))
2770         return "xxlor %x0,%x1,%x1";
2771
2772       else
2773         gcc_unreachable ();
2774     }
2775
2776   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
2777            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
2778     return "mfvsrld %0,%x1";
2779
2780   else if (VSX_REGNO_P (op0_regno))
2781     {
2782       fldDM = element << 1;
2783       if (!BYTES_BIG_ENDIAN)
2784         fldDM = 3 - fldDM;
2785       operands[3] = GEN_INT (fldDM);
2786       return "xxpermdi %x0,%x1,%x1,%3";
2787     }
2788
2789   else
2790     gcc_unreachable ();
2791 }
2792   [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
2793
2794 ;; Optimize extracting a single scalar element from memory.
2795 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
2796   [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
2797         (vec_select:<VSX_D:VS_scalar>
2798          (match_operand:VSX_D 1 "memory_operand" "m,m")
2799          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
2800    (clobber (match_scratch:P 3 "=&b,&b"))]
2801   "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
2802   "#"
2803   "&& reload_completed"
2804   [(set (match_dup 0) (match_dup 4))]
2805 {
2806   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2807                                            operands[3], <VSX_D:VS_scalar>mode);
2808 }
2809   [(set_attr "type" "fpload,load")
2810    (set_attr "length" "8")])
2811
2812 ;; Optimize storing a single scalar element that is the right location to
2813 ;; memory
2814 (define_insn "*vsx_extract_<mode>_store"
2815   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
2816         (vec_select:<VS_scalar>
2817          (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
2818          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
2819   "VECTOR_MEM_VSX_P (<MODE>mode)"
2820   "@
2821    stfd%U0%X0 %1,%0
2822    stxsd%U0x %x1,%y0
2823    stxsd %1,%0"
2824   [(set_attr "type" "fpstore")
2825    (set_attr "length" "4")])
2826
2827 ;; Variable V2DI/V2DF extract shift
2828 (define_insn "vsx_vslo_<mode>"
2829   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
2830         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
2831                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
2832                             UNSPEC_VSX_VSLO))]
2833   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2834   "vslo %0,%1,%2"
2835   [(set_attr "type" "vecperm")])
2836
2837 ;; Variable V2DI/V2DF extract
2838 (define_insn_and_split "vsx_extract_<mode>_var"
2839   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
2840         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
2841                              (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2842                             UNSPEC_VSX_EXTRACT))
2843    (clobber (match_scratch:DI 3 "=r,&b,&b"))
2844    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
2845   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2846   "#"
2847   "&& reload_completed"
2848   [(const_int 0)]
2849 {
2850   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2851                                 operands[3], operands[4]);
2852   DONE;
2853 })
2854
2855 ;; Extract a SF element from V4SF
2856 (define_insn_and_split "vsx_extract_v4sf"
2857   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2858         (vec_select:SF
2859          (match_operand:V4SF 1 "vsx_register_operand" "wa")
2860          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
2861    (clobber (match_scratch:V4SF 3 "=0"))]
2862   "VECTOR_UNIT_VSX_P (V4SFmode)"
2863   "#"
2864   "&& 1"
2865   [(const_int 0)]
2866 {
2867   rtx op0 = operands[0];
2868   rtx op1 = operands[1];
2869   rtx op2 = operands[2];
2870   rtx op3 = operands[3];
2871   rtx tmp;
2872   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
2873
2874   if (ele == 0)
2875     tmp = op1;
2876   else
2877     {
2878       if (GET_CODE (op3) == SCRATCH)
2879         op3 = gen_reg_rtx (V4SFmode);
2880       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
2881       tmp = op3;
2882     }
2883   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
2884   DONE;
2885 }
2886   [(set_attr "length" "8")
2887    (set_attr "type" "fp")])
2888
2889 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
2890   [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
2891         (vec_select:SF
2892          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
2893          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
2894    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
2895   "VECTOR_MEM_VSX_P (V4SFmode)"
2896   "#"
2897   "&& reload_completed"
2898   [(set (match_dup 0) (match_dup 4))]
2899 {
2900   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2901                                            operands[3], SFmode);
2902 }
2903   [(set_attr "type" "fpload,fpload,fpload,load")
2904    (set_attr "length" "8")])
2905
2906 ;; Variable V4SF extract
2907 (define_insn_and_split "vsx_extract_v4sf_var"
2908   [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
2909         (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
2910                     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2911                    UNSPEC_VSX_EXTRACT))
2912    (clobber (match_scratch:DI 3 "=r,&b,&b"))
2913    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
2914   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
2915   "#"
2916   "&& reload_completed"
2917   [(const_int 0)]
2918 {
2919   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2920                                 operands[3], operands[4]);
2921   DONE;
2922 })
2923
2924 ;; Expand the builtin form of xxpermdi to canonical rtl.
2925 (define_expand "vsx_xxpermdi_<mode>"
2926   [(match_operand:VSX_L 0 "vsx_register_operand")
2927    (match_operand:VSX_L 1 "vsx_register_operand")
2928    (match_operand:VSX_L 2 "vsx_register_operand")
2929    (match_operand:QI 3 "u5bit_cint_operand")]
2930   "VECTOR_MEM_VSX_P (<MODE>mode)"
2931 {
2932   rtx target = operands[0];
2933   rtx op0 = operands[1];
2934   rtx op1 = operands[2];
2935   int mask = INTVAL (operands[3]);
2936   rtx perm0 = GEN_INT ((mask >> 1) & 1);
2937   rtx perm1 = GEN_INT ((mask & 1) + 2);
2938   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2939
2940   if (<MODE>mode == V2DFmode)
2941     gen = gen_vsx_xxpermdi2_v2df_1;
2942   else
2943     {
2944       gen = gen_vsx_xxpermdi2_v2di_1;
2945       if (<MODE>mode != V2DImode)
2946         {
2947           target = gen_lowpart (V2DImode, target);
2948           op0 = gen_lowpart (V2DImode, op0);
2949           op1 = gen_lowpart (V2DImode, op1);
2950         }
2951     }
2952   emit_insn (gen (target, op0, op1, perm0, perm1));
2953   DONE;
2954 })
2955
2956 ;; Special version of xxpermdi that retains big-endian semantics.
2957 (define_expand "vsx_xxpermdi_<mode>_be"
2958   [(match_operand:VSX_L 0 "vsx_register_operand")
2959    (match_operand:VSX_L 1 "vsx_register_operand")
2960    (match_operand:VSX_L 2 "vsx_register_operand")
2961    (match_operand:QI 3 "u5bit_cint_operand")]
2962   "VECTOR_MEM_VSX_P (<MODE>mode)"
2963 {
2964   rtx target = operands[0];
2965   rtx op0 = operands[1];
2966   rtx op1 = operands[2];
2967   int mask = INTVAL (operands[3]);
2968   rtx perm0 = GEN_INT ((mask >> 1) & 1);
2969   rtx perm1 = GEN_INT ((mask & 1) + 2);
2970   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2971
2972   if (<MODE>mode == V2DFmode)
2973     gen = gen_vsx_xxpermdi2_v2df_1;
2974   else
2975     {
2976       gen = gen_vsx_xxpermdi2_v2di_1;
2977       if (<MODE>mode != V2DImode)
2978         {
2979           target = gen_lowpart (V2DImode, target);
2980           op0 = gen_lowpart (V2DImode, op0);
2981           op1 = gen_lowpart (V2DImode, op1);
2982         }
2983     }
2984   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
2985      transformation we don't want; it is necessary for
2986      rs6000_expand_vec_perm_const_1 but not for this use.  So we
2987      prepare for that by reversing the transformation here.  */
2988   if (BYTES_BIG_ENDIAN)
2989     emit_insn (gen (target, op0, op1, perm0, perm1));
2990   else
2991     {
2992       rtx p0 = GEN_INT (3 - INTVAL (perm1));
2993       rtx p1 = GEN_INT (3 - INTVAL (perm0));
2994       emit_insn (gen (target, op1, op0, p0, p1));
2995     }
2996   DONE;
2997 })
2998
2999 (define_insn "vsx_xxpermdi2_<mode>_1"
3000   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3001         (vec_select:VSX_D
3002           (vec_concat:<VS_double>
3003             (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3004             (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3005           (parallel [(match_operand 3 "const_0_to_1_operand" "")
3006                      (match_operand 4 "const_2_to_3_operand" "")])))]
3007   "VECTOR_MEM_VSX_P (<MODE>mode)"
3008 {
3009   int op3, op4, mask;
3010
3011   /* For little endian, swap operands and invert/swap selectors
3012      to get the correct xxpermdi.  The operand swap sets up the
3013      inputs as a little endian array.  The selectors are swapped
3014      because they are defined to use big endian ordering.  The
3015      selectors are inverted to get the correct doublewords for
3016      little endian ordering.  */
3017   if (BYTES_BIG_ENDIAN)
3018     {
3019       op3 = INTVAL (operands[3]);
3020       op4 = INTVAL (operands[4]);
3021     }
3022   else
3023     {
3024       op3 = 3 - INTVAL (operands[4]);
3025       op4 = 3 - INTVAL (operands[3]);
3026     }
3027
3028   mask = (op3 << 1) | (op4 - 2);
3029   operands[3] = GEN_INT (mask);
3030
3031   if (BYTES_BIG_ENDIAN)
3032     return "xxpermdi %x0,%x1,%x2,%3";
3033   else
3034     return "xxpermdi %x0,%x2,%x1,%3";
3035 }
3036   [(set_attr "type" "vecperm")])
3037
3038 (define_expand "vec_perm_const<mode>"
3039   [(match_operand:VSX_D 0 "vsx_register_operand" "")
3040    (match_operand:VSX_D 1 "vsx_register_operand" "")
3041    (match_operand:VSX_D 2 "vsx_register_operand" "")
3042    (match_operand:V2DI  3 "" "")]
3043   "VECTOR_MEM_VSX_P (<MODE>mode)"
3044 {
3045   if (rs6000_expand_vec_perm_const (operands))
3046     DONE;
3047   else
3048     FAIL;
3049 })
3050
3051 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3052 ;; none of the small types were allowed in a vector register, so we had to
3053 ;; extract to a DImode and either do a direct move or store.
3054 (define_expand  "vsx_extract_<mode>"
3055   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3056                    (vec_select:<VS_scalar>
3057                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3058                     (parallel [(match_operand:QI 2 "const_int_operand")])))
3059               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3060   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3061 {
3062   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3063   if (TARGET_P9_VECTOR)
3064     {
3065       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3066                                             operands[2]));
3067       DONE;
3068     }
3069 })
3070
3071 (define_insn "vsx_extract_<mode>_p9"
3072   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3073         (vec_select:<VS_scalar>
3074          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3075          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3076    (clobber (match_scratch:SI 3 "=r,X"))]
3077   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3078 {
3079   if (which_alternative == 0)
3080     return "#";
3081
3082   else
3083     {
3084       HOST_WIDE_INT elt = INTVAL (operands[2]);
3085       HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
3086                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3087                                : elt);
3088
3089       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3090       HOST_WIDE_INT offset = unit_size * elt_adj;
3091
3092       operands[2] = GEN_INT (offset);
3093       if (unit_size == 4)
3094         return "xxextractuw %x0,%x1,%2";
3095       else
3096         return "vextractu<wd> %0,%1,%2";
3097     }
3098 }
3099   [(set_attr "type" "vecsimple")])
3100
3101 (define_split
3102   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3103         (vec_select:<VS_scalar>
3104          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3105          (parallel [(match_operand:QI 2 "const_int_operand")])))
3106    (clobber (match_operand:SI 3 "int_reg_operand"))]
3107   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3108   [(const_int 0)]
3109 {
3110   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3111   rtx op1 = operands[1];
3112   rtx op2 = operands[2];
3113   rtx op3 = operands[3];
3114   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3115
3116   emit_move_insn (op3, GEN_INT (offset));
3117   if (VECTOR_ELT_ORDER_BIG)
3118     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3119   else
3120     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3121   DONE;
3122 })
3123
3124 ;; Optimize zero extracts to eliminate the AND after the extract.
3125 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3126   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3127         (zero_extend:DI
3128          (vec_select:<VS_scalar>
3129           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3130           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3131    (clobber (match_scratch:SI 3 "=r,X"))]
3132   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3133   "#"
3134   "&& reload_completed"
3135   [(parallel [(set (match_dup 4)
3136                    (vec_select:<VS_scalar>
3137                     (match_dup 1)
3138                     (parallel [(match_dup 2)])))
3139               (clobber (match_dup 3))])]
3140 {
3141   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3142 })
3143
3144 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3145 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3146   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3147         (vec_select:<VS_scalar>
3148          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3149          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3150    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3151    (clobber (match_scratch:SI 4 "=X,&r"))]
3152   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3153   "#"
3154   "&& reload_completed"
3155   [(parallel [(set (match_dup 3)
3156                    (vec_select:<VS_scalar>
3157                     (match_dup 1)
3158                     (parallel [(match_dup 2)])))
3159               (clobber (match_dup 4))])
3160    (set (match_dup 0)
3161         (match_dup 3))])
3162
3163 (define_insn_and_split  "*vsx_extract_si"
3164   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3165         (vec_select:SI
3166          (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3167          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3168    (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3169   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3170   "#"
3171   "&& reload_completed"
3172   [(const_int 0)]
3173 {
3174   rtx dest = operands[0];
3175   rtx src = operands[1];
3176   rtx element = operands[2];
3177   rtx vec_tmp = operands[3];
3178   int value;
3179
3180   if (!VECTOR_ELT_ORDER_BIG)
3181     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3182
3183   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3184      instruction.  */
3185   value = INTVAL (element);
3186   if (value != 1)
3187     emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3188   else
3189     vec_tmp = src;
3190
3191   if (MEM_P (operands[0]))
3192     {
3193       if (can_create_pseudo_p ())
3194         dest = rs6000_address_for_fpconvert (dest);
3195
3196       if (TARGET_P8_VECTOR)
3197         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3198       else
3199         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3200     }
3201
3202   else if (TARGET_P8_VECTOR)
3203     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3204   else
3205     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3206                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
3207
3208   DONE;
3209 }
3210   [(set_attr "type" "mftgpr,vecperm,fpstore")
3211    (set_attr "length" "8")])
3212
3213 (define_insn_and_split  "*vsx_extract_<mode>_p8"
3214   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3215         (vec_select:<VS_scalar>
3216          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3217          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3218    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3219   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3220    && !TARGET_P9_VECTOR"
3221   "#"
3222   "&& reload_completed"
3223   [(const_int 0)]
3224 {
3225   rtx dest = operands[0];
3226   rtx src = operands[1];
3227   rtx element = operands[2];
3228   rtx vec_tmp = operands[3];
3229   int value;
3230
3231   if (!VECTOR_ELT_ORDER_BIG)
3232     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3233
3234   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3235      instruction.  */
3236   value = INTVAL (element);
3237   if (<MODE>mode == V16QImode)
3238     {
3239       if (value != 7)
3240         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3241       else
3242         vec_tmp = src;
3243     }
3244   else if (<MODE>mode == V8HImode)
3245     {
3246       if (value != 3)
3247         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3248       else
3249         vec_tmp = src;
3250     }
3251   else
3252     gcc_unreachable ();
3253
3254   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3255                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
3256   DONE;
3257 }
3258   [(set_attr "type" "mftgpr")])
3259
3260 ;; Optimize extracting a single scalar element from memory.
3261 (define_insn_and_split "*vsx_extract_<mode>_load"
3262   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3263         (vec_select:<VS_scalar>
3264          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3265          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3266    (clobber (match_scratch:DI 3 "=&b"))]
3267   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3268   "#"
3269   "&& reload_completed"
3270   [(set (match_dup 0) (match_dup 4))]
3271 {
3272   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3273                                            operands[3], <VS_scalar>mode);
3274 }
3275   [(set_attr "type" "load")
3276    (set_attr "length" "8")])
3277
3278 ;; Variable V16QI/V8HI/V4SI extract
3279 (define_insn_and_split "vsx_extract_<mode>_var"
3280   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3281         (unspec:<VS_scalar>
3282          [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3283           (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3284          UNSPEC_VSX_EXTRACT))
3285    (clobber (match_scratch:DI 3 "=r,r,&b"))
3286    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3287   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3288   "#"
3289   "&& reload_completed"
3290   [(const_int 0)]
3291 {
3292   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3293                                 operands[3], operands[4]);
3294   DONE;
3295 })
3296
3297 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3298   [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3299         (zero_extend:SDI
3300          (unspec:<VSX_EXTRACT_I:VS_scalar>
3301           [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3302            (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3303           UNSPEC_VSX_EXTRACT)))
3304    (clobber (match_scratch:DI 3 "=r,r,&b"))
3305    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3306   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3307   "#"
3308   "&& reload_completed"
3309   [(const_int 0)]
3310 {
3311   machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3312   rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3313                                 operands[1], operands[2],
3314                                 operands[3], operands[4]);
3315   DONE;
3316 })
3317
3318 ;; VSX_EXTRACT optimizations
3319 ;; Optimize double d = (double) vec_extract (vi, <n>)
3320 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3321 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3322   [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3323         (any_float:DF
3324          (vec_select:SI
3325           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3326           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3327    (clobber (match_scratch:V4SI 3 "=v"))]
3328   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3329   "#"
3330   "&& 1"
3331   [(const_int 0)]
3332 {
3333   rtx dest = operands[0];
3334   rtx src = operands[1];
3335   rtx element = operands[2];
3336   rtx v4si_tmp = operands[3];
3337   int value;
3338
3339   if (!VECTOR_ELT_ORDER_BIG)
3340     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3341
3342   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3343      instruction.  */
3344   value = INTVAL (element);
3345   if (value != 0)
3346     {
3347       if (GET_CODE (v4si_tmp) == SCRATCH)
3348         v4si_tmp = gen_reg_rtx (V4SImode);
3349       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3350     }
3351   else
3352     v4si_tmp = src;
3353
3354   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3355   DONE;
3356 })
3357
3358 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3359 ;; where <type> is a floating point type that supported by the hardware that is
3360 ;; not double.  First convert the value to double, and then to the desired
3361 ;; type.
3362 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3363   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3364         (any_float:VSX_EXTRACT_FL
3365          (vec_select:SI
3366           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3367           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3368    (clobber (match_scratch:V4SI 3 "=v"))
3369    (clobber (match_scratch:DF 4 "=ws"))]
3370   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3371   "#"
3372   "&& 1"
3373   [(const_int 0)]
3374 {
3375   rtx dest = operands[0];
3376   rtx src = operands[1];
3377   rtx element = operands[2];
3378   rtx v4si_tmp = operands[3];
3379   rtx df_tmp = operands[4];
3380   int value;
3381
3382   if (!VECTOR_ELT_ORDER_BIG)
3383     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3384
3385   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3386      instruction.  */
3387   value = INTVAL (element);
3388   if (value != 0)
3389     {
3390       if (GET_CODE (v4si_tmp) == SCRATCH)
3391         v4si_tmp = gen_reg_rtx (V4SImode);
3392       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3393     }
3394   else
3395     v4si_tmp = src;
3396
3397   if (GET_CODE (df_tmp) == SCRATCH)
3398     df_tmp = gen_reg_rtx (DFmode);
3399
3400   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3401
3402   if (<MODE>mode == SFmode)
3403     emit_insn (gen_truncdfsf2 (dest, df_tmp));
3404   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3405     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3406   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3407            && TARGET_FLOAT128_HW)
3408     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3409   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3410     emit_insn (gen_extenddfif2 (dest, df_tmp));
3411   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3412     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3413   else
3414     gcc_unreachable ();
3415
3416   DONE;
3417 })
3418
3419 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3420 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3421 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3422 ;; vector short or vector unsigned short.
3423 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3424   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3425         (float:FL_CONV
3426          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3427           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3428           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3429    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3430   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3431    && TARGET_P9_VECTOR"
3432   "#"
3433   "&& reload_completed"
3434   [(parallel [(set (match_dup 3)
3435                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3436                     (match_dup 1)
3437                     (parallel [(match_dup 2)])))
3438               (clobber (scratch:SI))])
3439    (set (match_dup 4)
3440         (sign_extend:DI (match_dup 3)))
3441    (set (match_dup 0)
3442         (float:<FL_CONV:MODE> (match_dup 4)))]
3443 {
3444   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3445 })
3446
3447 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3448   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3449         (unsigned_float:FL_CONV
3450          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3451           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3452           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3453    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3454   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3455    && TARGET_P9_VECTOR"
3456   "#"
3457   "&& reload_completed"
3458   [(parallel [(set (match_dup 3)
3459                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3460                     (match_dup 1)
3461                     (parallel [(match_dup 2)])))
3462               (clobber (scratch:SI))])
3463    (set (match_dup 0)
3464         (float:<FL_CONV:MODE> (match_dup 4)))]
3465 {
3466   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3467 })
3468
3469 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3470 (define_insn "vsx_set_<mode>_p9"
3471   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3472         (unspec:VSX_EXTRACT_I
3473          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3474           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3475           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3476          UNSPEC_VSX_SET))]
3477   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3478 {
3479   int ele = INTVAL (operands[3]);
3480   int nunits = GET_MODE_NUNITS (<MODE>mode);
3481
3482   if (!VECTOR_ELT_ORDER_BIG)
3483     ele = nunits - 1 - ele;
3484
3485   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3486   if (<MODE>mode == V4SImode)
3487     return "xxinsertw %x0,%x2,%3";
3488   else
3489     return "vinsert<wd> %0,%2,%3";
3490 }
3491   [(set_attr "type" "vecperm")])
3492
3493 (define_insn_and_split "vsx_set_v4sf_p9"
3494   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3495         (unspec:V4SF
3496          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3497           (match_operand:SF 2 "gpc_reg_operand" "ww")
3498           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3499          UNSPEC_VSX_SET))
3500    (clobber (match_scratch:SI 4 "=&wJwK"))]
3501   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3502   "#"
3503   "&& reload_completed"
3504   [(set (match_dup 5)
3505         (unspec:V4SF [(match_dup 2)]
3506                      UNSPEC_VSX_CVDPSPN))
3507    (parallel [(set (match_dup 4)
3508                    (vec_select:SI (match_dup 6)
3509                                   (parallel [(match_dup 7)])))
3510               (clobber (scratch:SI))])
3511    (set (match_dup 8)
3512         (unspec:V4SI [(match_dup 8)
3513                       (match_dup 4)
3514                       (match_dup 3)]
3515                      UNSPEC_VSX_SET))]
3516 {
3517   unsigned int tmp_regno = reg_or_subregno (operands[4]);
3518
3519   operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3520   operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3521   operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
3522   operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3523 }
3524   [(set_attr "type" "vecperm")
3525    (set_attr "length" "12")])
3526
3527 ;; Special case setting 0.0f to a V4SF element
3528 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3529   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3530         (unspec:V4SF
3531          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3532           (match_operand:SF 2 "zero_fp_constant" "j")
3533           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3534          UNSPEC_VSX_SET))
3535    (clobber (match_scratch:SI 4 "=&wJwK"))]
3536   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3537   "#"
3538   "&& reload_completed"
3539   [(set (match_dup 4)
3540         (const_int 0))
3541    (set (match_dup 5)
3542         (unspec:V4SI [(match_dup 5)
3543                       (match_dup 4)
3544                       (match_dup 3)]
3545                      UNSPEC_VSX_SET))]
3546 {
3547   operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3548 }
3549   [(set_attr "type" "vecperm")
3550    (set_attr "length" "8")])
3551
3552 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
3553 ;; that is in the default scalar position (1 for big endian, 2 for little
3554 ;; endian).  We just need to do an xxinsertw since the element is in the
3555 ;; correct location.
3556
3557 (define_insn "*vsx_insert_extract_v4sf_p9"
3558   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3559         (unspec:V4SF
3560          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3561           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3562                          (parallel
3563                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3564           (match_operand:QI 4 "const_0_to_3_operand" "n")]
3565          UNSPEC_VSX_SET))]
3566   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
3567    && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
3568 {
3569   int ele = INTVAL (operands[4]);
3570
3571   if (!VECTOR_ELT_ORDER_BIG)
3572     ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
3573
3574   operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
3575   return "xxinsertw %x0,%x2,%4";
3576 }
3577   [(set_attr "type" "vecperm")])
3578
3579 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
3580 ;; that is in the default scalar position (1 for big endian, 2 for little
3581 ;; endian).  Convert the insert/extract to int and avoid doing the conversion.
3582
3583 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
3584   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3585         (unspec:V4SF
3586          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3587           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3588                          (parallel
3589                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3590           (match_operand:QI 4 "const_0_to_3_operand" "n")]
3591          UNSPEC_VSX_SET))
3592    (clobber (match_scratch:SI 5 "=&wJwK"))]
3593   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
3594    && TARGET_P9_VECTOR && TARGET_POWERPC64
3595    && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
3596   "#"
3597   "&& 1"
3598   [(parallel [(set (match_dup 5)
3599                    (vec_select:SI (match_dup 6)
3600                                   (parallel [(match_dup 3)])))
3601               (clobber (scratch:SI))])
3602    (set (match_dup 7)
3603         (unspec:V4SI [(match_dup 8)
3604                       (match_dup 5)
3605                       (match_dup 4)]
3606                      UNSPEC_VSX_SET))]
3607 {
3608   if (GET_CODE (operands[5]) == SCRATCH)
3609     operands[5] = gen_reg_rtx (SImode);
3610
3611   operands[6] = gen_lowpart (V4SImode, operands[2]);
3612   operands[7] = gen_lowpart (V4SImode, operands[0]);
3613   operands[8] = gen_lowpart (V4SImode, operands[1]);
3614 }
3615   [(set_attr "type" "vecperm")])
3616
3617 ;; Expanders for builtins
3618 (define_expand "vsx_mergel_<mode>"
3619   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
3620    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
3621    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
3622   "VECTOR_MEM_VSX_P (<MODE>mode)"
3623 {
3624   rtvec v;
3625   rtx x;
3626
3627   /* Special handling for LE with -maltivec=be.  */
3628   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
3629     {
3630       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
3631       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
3632     }
3633   else
3634     {
3635       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
3636       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
3637     }
3638
3639   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
3640   emit_insn (gen_rtx_SET (operands[0], x));
3641   DONE;
3642 })
3643
3644 (define_expand "vsx_mergeh_<mode>"
3645   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
3646    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
3647    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
3648   "VECTOR_MEM_VSX_P (<MODE>mode)"
3649 {
3650   rtvec v;
3651   rtx x;
3652
3653   /* Special handling for LE with -maltivec=be.  */
3654   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
3655     {
3656       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
3657       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
3658     }
3659   else
3660     {
3661       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
3662       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
3663     }
3664
3665   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
3666   emit_insn (gen_rtx_SET (operands[0], x));
3667   DONE;
3668 })
3669
3670 ;; V2DF/V2DI splat
3671 ;; We separate the register splat insn from the memory splat insn to force the
3672 ;; register allocator to generate the indexed form of the SPLAT when it is
3673 ;; given an offsettable memory reference.  Otherwise, if the register and
3674 ;; memory insns were combined into a single insn, the register allocator will
3675 ;; load the value into a register, and then do a double word permute.
3676 (define_expand "vsx_splat_<mode>"
3677   [(set (match_operand:VSX_D 0 "vsx_register_operand")
3678         (vec_duplicate:VSX_D
3679          (match_operand:<VS_scalar> 1 "input_operand")))]
3680   "VECTOR_MEM_VSX_P (<MODE>mode)"
3681 {
3682   rtx op1 = operands[1];
3683   if (MEM_P (op1))
3684     operands[1] = rs6000_address_for_fpconvert (op1);
3685   else if (!REG_P (op1))
3686     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
3687 })
3688
3689 (define_insn "vsx_splat_<mode>_reg"
3690   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
3691         (vec_duplicate:VSX_D
3692          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
3693   "VECTOR_MEM_VSX_P (<MODE>mode)"
3694   "@
3695    xxpermdi %x0,%x1,%x1,0
3696    mtvsrdd %x0,%1,%1"
3697   [(set_attr "type" "vecperm")])
3698
3699 (define_insn "vsx_splat_<VSX_D:mode>_mem"
3700   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
3701         (vec_duplicate:VSX_D
3702          (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
3703   "VECTOR_MEM_VSX_P (<MODE>mode)"
3704   "lxvdsx %x0,%y1"
3705   [(set_attr "type" "vecload")])
3706
3707 ;; V4SI splat support
3708 (define_insn "vsx_splat_v4si"
3709   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
3710         (vec_duplicate:V4SI
3711          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
3712   "TARGET_P9_VECTOR"
3713   "@
3714    mtvsrws %x0,%1
3715    lxvwsx %x0,%y1"
3716   [(set_attr "type" "vecperm,vecload")])
3717
3718 ;; SImode is not currently allowed in vector registers.  This pattern
3719 ;; allows us to use direct move to get the value in a vector register
3720 ;; so that we can use XXSPLTW
3721 (define_insn "vsx_splat_v4si_di"
3722   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
3723         (vec_duplicate:V4SI
3724          (truncate:SI
3725           (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
3726   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3727   "@
3728    xxspltw %x0,%x1,1
3729    mtvsrws %x0,%1"
3730   [(set_attr "type" "vecperm")])
3731
3732 ;; V4SF splat (ISA 3.0)
3733 (define_insn_and_split "vsx_splat_v4sf"
3734   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
3735         (vec_duplicate:V4SF
3736          (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
3737   "TARGET_P9_VECTOR"
3738   "@
3739    lxvwsx %x0,%y1
3740    #
3741    mtvsrws %x0,%1"
3742   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
3743   [(set (match_dup 0)
3744         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
3745    (set (match_dup 0)
3746         (unspec:V4SF [(match_dup 0)
3747                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
3748   ""
3749   [(set_attr "type" "vecload,vecperm,mftgpr")
3750    (set_attr "length" "4,8,4")])
3751
3752 ;; V4SF/V4SI splat from a vector element
3753 (define_insn "vsx_xxspltw_<mode>"
3754   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3755         (vec_duplicate:VSX_W
3756          (vec_select:<VS_scalar>
3757           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3758           (parallel
3759            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
3760   "VECTOR_MEM_VSX_P (<MODE>mode)"
3761 {
3762   if (!BYTES_BIG_ENDIAN)
3763     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
3764
3765   return "xxspltw %x0,%x1,%2";
3766 }
3767   [(set_attr "type" "vecperm")])
3768
3769 (define_insn "vsx_xxspltw_<mode>_direct"
3770   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3771         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3772                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
3773                       UNSPEC_VSX_XXSPLTW))]
3774   "VECTOR_MEM_VSX_P (<MODE>mode)"
3775   "xxspltw %x0,%x1,%2"
3776   [(set_attr "type" "vecperm")])
3777
3778 ;; V16QI/V8HI splat support on ISA 2.07
3779 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
3780   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
3781         (vec_duplicate:VSX_SPLAT_I
3782          (truncate:<VS_scalar>
3783           (match_operand:DI 1 "altivec_register_operand" "v"))))]
3784   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3785   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
3786   [(set_attr "type" "vecperm")])
3787
3788 ;; V2DF/V2DI splat for use by vec_splat builtin
3789 (define_insn "vsx_xxspltd_<mode>"
3790   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3791         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
3792                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
3793                       UNSPEC_VSX_XXSPLTD))]
3794   "VECTOR_MEM_VSX_P (<MODE>mode)"
3795 {
3796   if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
3797       || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
3798     return "xxpermdi %x0,%x1,%x1,0";
3799   else
3800     return "xxpermdi %x0,%x1,%x1,3";
3801 }
3802   [(set_attr "type" "vecperm")])
3803
3804 ;; V4SF/V4SI interleave
3805 (define_insn "vsx_xxmrghw_<mode>"
3806   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
3807         (vec_select:VSX_W
3808           (vec_concat:<VS_double>
3809             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
3810             (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
3811           (parallel [(const_int 0) (const_int 4)
3812                      (const_int 1) (const_int 5)])))]
3813   "VECTOR_MEM_VSX_P (<MODE>mode)"
3814 {
3815   if (BYTES_BIG_ENDIAN)
3816     return "xxmrghw %x0,%x1,%x2";
3817   else
3818     return "xxmrglw %x0,%x2,%x1";
3819 }
3820   [(set_attr "type" "vecperm")])
3821
3822 (define_insn "vsx_xxmrglw_<mode>"
3823   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
3824         (vec_select:VSX_W
3825           (vec_concat:<VS_double>
3826             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
3827             (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
3828           (parallel [(const_int 2) (const_int 6)
3829                      (const_int 3) (const_int 7)])))]
3830   "VECTOR_MEM_VSX_P (<MODE>mode)"
3831 {
3832   if (BYTES_BIG_ENDIAN)
3833     return "xxmrglw %x0,%x1,%x2";
3834   else
3835     return "xxmrghw %x0,%x2,%x1";
3836 }
3837   [(set_attr "type" "vecperm")])
3838
3839 ;; Shift left double by word immediate
3840 (define_insn "vsx_xxsldwi_<mode>"
3841   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
3842         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
3843                        (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
3844                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
3845                       UNSPEC_VSX_SLDWI))]
3846   "VECTOR_MEM_VSX_P (<MODE>mode)"
3847   "xxsldwi %x0,%x1,%x2,%3"
3848   [(set_attr "type" "vecperm")])
3849
3850 \f
3851 ;; Vector reduction insns and splitters
3852
3853 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
3854   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
3855         (VEC_reduc:V2DF
3856          (vec_concat:V2DF
3857           (vec_select:DF
3858            (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
3859            (parallel [(const_int 1)]))
3860           (vec_select:DF
3861            (match_dup 1)
3862            (parallel [(const_int 0)])))
3863          (match_dup 1)))
3864    (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
3865   "VECTOR_UNIT_VSX_P (V2DFmode)"
3866   "#"
3867   ""
3868   [(const_int 0)]
3869   "
3870 {
3871   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
3872              ? gen_reg_rtx (V2DFmode)
3873              : operands[2];
3874   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
3875   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
3876   DONE;
3877 }"
3878   [(set_attr "length" "8")
3879    (set_attr "type" "veccomplex")])
3880
3881 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
3882   [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
3883         (VEC_reduc:V4SF
3884          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
3885          (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
3886    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
3887    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
3888   "VECTOR_UNIT_VSX_P (V4SFmode)"
3889   "#"
3890   ""
3891   [(const_int 0)]
3892   "
3893 {
3894   rtx op0 = operands[0];
3895   rtx op1 = operands[1];
3896   rtx tmp2, tmp3, tmp4;
3897
3898   if (can_create_pseudo_p ())
3899     {
3900       tmp2 = gen_reg_rtx (V4SFmode);
3901       tmp3 = gen_reg_rtx (V4SFmode);
3902       tmp4 = gen_reg_rtx (V4SFmode);
3903     }
3904   else
3905     {
3906       tmp2 = operands[2];
3907       tmp3 = operands[3];
3908       tmp4 = tmp2;
3909     }
3910
3911   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
3912   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
3913   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
3914   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
3915   DONE;
3916 }"
3917   [(set_attr "length" "16")
3918    (set_attr "type" "veccomplex")])
3919
3920 ;; Combiner patterns with the vector reduction patterns that knows we can get
3921 ;; to the top element of the V2DF array without doing an extract.
3922
3923 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
3924   [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
3925         (vec_select:DF
3926          (VEC_reduc:V2DF
3927           (vec_concat:V2DF
3928            (vec_select:DF
3929             (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
3930             (parallel [(const_int 1)]))
3931            (vec_select:DF
3932             (match_dup 1)
3933             (parallel [(const_int 0)])))
3934           (match_dup 1))
3935          (parallel [(const_int 1)])))
3936    (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
3937   "VECTOR_UNIT_VSX_P (V2DFmode)"
3938   "#"
3939   ""
3940   [(const_int 0)]
3941   "
3942 {
3943   rtx hi = gen_highpart (DFmode, operands[1]);
3944   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
3945             ? gen_reg_rtx (DFmode)
3946             : operands[2];
3947
3948   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
3949   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
3950   DONE;
3951 }"
3952   [(set_attr "length" "8")
3953    (set_attr "type" "veccomplex")])
3954
3955 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
3956   [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
3957         (vec_select:SF
3958          (VEC_reduc:V4SF
3959           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
3960           (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
3961          (parallel [(const_int 3)])))
3962    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
3963    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
3964    (clobber (match_scratch:V4SF 4 "=0,0"))]
3965   "VECTOR_UNIT_VSX_P (V4SFmode)"
3966   "#"
3967   ""
3968   [(const_int 0)]
3969   "
3970 {
3971   rtx op0 = operands[0];
3972   rtx op1 = operands[1];
3973   rtx tmp2, tmp3, tmp4, tmp5;
3974
3975   if (can_create_pseudo_p ())
3976     {
3977       tmp2 = gen_reg_rtx (V4SFmode);
3978       tmp3 = gen_reg_rtx (V4SFmode);
3979       tmp4 = gen_reg_rtx (V4SFmode);
3980       tmp5 = gen_reg_rtx (V4SFmode);
3981     }
3982   else
3983     {
3984       tmp2 = operands[2];
3985       tmp3 = operands[3];
3986       tmp4 = tmp2;
3987       tmp5 = operands[4];
3988     }
3989
3990   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
3991   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
3992   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
3993   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
3994   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
3995   DONE;
3996 }"
3997   [(set_attr "length" "20")
3998    (set_attr "type" "veccomplex")])
3999
4000 \f
4001 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4002 (define_peephole
4003   [(set (match_operand:P 0 "base_reg_operand" "")
4004         (match_operand:P 1 "short_cint_operand" ""))
4005    (set (match_operand:VSX_M 2 "vsx_register_operand" "")
4006         (mem:VSX_M (plus:P (match_dup 0)
4007                            (match_operand:P 3 "int_reg_operand" ""))))]
4008   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4009   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
4010   [(set_attr "length" "8")
4011    (set_attr "type" "vecload")])
4012
4013 (define_peephole
4014   [(set (match_operand:P 0 "base_reg_operand" "")
4015         (match_operand:P 1 "short_cint_operand" ""))
4016    (set (match_operand:VSX_M 2 "vsx_register_operand" "")
4017         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
4018                            (match_dup 0))))]
4019   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4020   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
4021   [(set_attr "length" "8")
4022    (set_attr "type" "vecload")])
4023
4024 \f
4025 ;; ISA 3.0 vector extend sign support
4026
4027 (define_insn "vsx_sign_extend_qi_<mode>"
4028   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4029         (unspec:VSINT_84
4030          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4031          UNSPEC_VSX_SIGN_EXTEND))]
4032   "TARGET_P9_VECTOR"
4033   "vextsb2<wd> %0,%1"
4034   [(set_attr "type" "vecexts")])
4035
4036 (define_insn "vsx_sign_extend_hi_<mode>"
4037   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4038         (unspec:VSINT_84
4039          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4040          UNSPEC_VSX_SIGN_EXTEND))]
4041   "TARGET_P9_VECTOR"
4042   "vextsh2<wd> %0,%1"
4043   [(set_attr "type" "vecexts")])
4044
4045 (define_insn "*vsx_sign_extend_si_v2di"
4046   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4047         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4048                      UNSPEC_VSX_SIGN_EXTEND))]
4049   "TARGET_P9_VECTOR"
4050   "vextsw2d %0,%1"
4051   [(set_attr "type" "vecexts")])
4052
4053 \f
4054 ;; ISA 3.0 Binary Floating-Point Support
4055
4056 ;; VSX Scalar Extract Exponent Quad-Precision
4057 (define_insn "xsxexpqp"
4058   [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4059         (unspec:DI [(match_operand:KF 1 "altivec_register_operand" "v")]
4060          UNSPEC_VSX_SXEXPDP))]
4061   "TARGET_P9_VECTOR"
4062   "xsxexpqp %0,%1"
4063   [(set_attr "type" "vecmove")])
4064
4065 ;; VSX Scalar Extract Exponent Double-Precision
4066 (define_insn "xsxexpdp"
4067   [(set (match_operand:DI 0 "register_operand" "=r")
4068         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4069          UNSPEC_VSX_SXEXPDP))]
4070   "TARGET_P9_VECTOR && TARGET_64BIT"
4071   "xsxexpdp %0,%x1"
4072   [(set_attr "type" "integer")])
4073
4074 ;; VSX Scalar Extract Significand Quad-Precision
4075 (define_insn "xsxsigqp"
4076   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4077         (unspec:TI [(match_operand:KF 1 "altivec_register_operand" "v")]
4078          UNSPEC_VSX_SXSIG))]
4079   "TARGET_P9_VECTOR"
4080   "xsxsigqp %0,%1"
4081   [(set_attr "type" "vecmove")])
4082
4083 ;; VSX Scalar Extract Significand Double-Precision
4084 (define_insn "xsxsigdp"
4085   [(set (match_operand:DI 0 "register_operand" "=r")
4086         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4087          UNSPEC_VSX_SXSIG))]
4088   "TARGET_P9_VECTOR && TARGET_64BIT"
4089   "xsxsigdp %0,%x1"
4090   [(set_attr "type" "integer")])
4091
4092 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4093 (define_insn "xsiexpqpf"
4094   [(set (match_operand:KF 0 "altivec_register_operand" "=v")
4095         (unspec:KF [(match_operand:KF 1 "altivec_register_operand" "v")
4096                     (match_operand:DI 2 "altivec_register_operand" "v")]
4097          UNSPEC_VSX_SIEXPQP))]
4098   "TARGET_P9_VECTOR"
4099   "xsiexpqp %0,%1,%2"
4100   [(set_attr "type" "vecmove")])
4101
4102 ;; VSX Scalar Insert Exponent Quad-Precision
4103 (define_insn "xsiexpqp"
4104   [(set (match_operand:KF 0 "altivec_register_operand" "=v")
4105         (unspec:KF [(match_operand:TI 1 "altivec_register_operand" "v")
4106                     (match_operand:DI 2 "altivec_register_operand" "v")]
4107          UNSPEC_VSX_SIEXPQP))]
4108   "TARGET_P9_VECTOR"
4109   "xsiexpqp %0,%1,%2"
4110   [(set_attr "type" "vecmove")])
4111
4112 ;; VSX Scalar Insert Exponent Double-Precision
4113 (define_insn "xsiexpdp"
4114   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4115         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4116                     (match_operand:DI 2 "register_operand" "r")]
4117          UNSPEC_VSX_SIEXPDP))]
4118   "TARGET_P9_VECTOR && TARGET_64BIT"
4119   "xsiexpdp %x0,%1,%2"
4120   [(set_attr "type" "fpsimple")])
4121
4122 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4123 (define_insn "xsiexpdpf"
4124   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4125         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4126                     (match_operand:DI 2 "register_operand" "r")]
4127          UNSPEC_VSX_SIEXPDP))]
4128   "TARGET_P9_VECTOR && TARGET_64BIT"
4129   "xsiexpdp %x0,%1,%2"
4130   [(set_attr "type" "fpsimple")])
4131
4132 ;; VSX Scalar Compare Exponents Double-Precision
4133 (define_expand "xscmpexpdp_<code>"
4134   [(set (match_dup 3)
4135         (compare:CCFP
4136          (unspec:DF
4137           [(match_operand:DF 1 "vsx_register_operand" "wa")
4138            (match_operand:DF 2 "vsx_register_operand" "wa")]
4139           UNSPEC_VSX_SCMPEXPDP)
4140          (const_int 0)))
4141    (set (match_operand:SI 0 "register_operand" "=r")
4142         (CMP_TEST:SI (match_dup 3)
4143                      (const_int 0)))]
4144   "TARGET_P9_VECTOR"
4145 {
4146   operands[3] = gen_reg_rtx (CCFPmode);
4147 })
4148
4149 (define_insn "*xscmpexpdp"
4150   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4151         (compare:CCFP
4152          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4153                      (match_operand:DF 2 "vsx_register_operand" "wa")]
4154           UNSPEC_VSX_SCMPEXPDP)
4155          (match_operand:SI 3 "zero_constant" "j")))]
4156   "TARGET_P9_VECTOR"
4157   "xscmpexpdp %0,%x1,%x2"
4158   [(set_attr "type" "fpcompare")])
4159
4160 ;; VSX Scalar Test Data Class Quad-Precision
4161 ;;  (Expansion for scalar_test_data_class (__ieee128, int))
4162 ;;   (Has side effect of setting the lt bit if operand 1 is negative,
4163 ;;    setting the eq bit if any of the conditions tested by operand 2
4164 ;;    are satisfied, and clearing the gt and undordered bits to zero.)
4165 (define_expand "xststdcqp"
4166   [(set (match_dup 3)
4167         (compare:CCFP
4168          (unspec:KF
4169           [(match_operand:KF 1 "altivec_register_operand" "v")
4170            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4171           UNSPEC_VSX_STSTDC)
4172          (const_int 0)))
4173    (set (match_operand:SI 0 "register_operand" "=r")
4174         (eq:SI (match_dup 3)
4175                (const_int 0)))]
4176   "TARGET_P9_VECTOR"
4177 {
4178   operands[3] = gen_reg_rtx (CCFPmode);
4179 })
4180
4181 ;; VSX Scalar Test Data Class Double- and Single-Precision
4182 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4183 ;;   if any of the conditions tested by operand 2 are satisfied.
4184 ;;   The gt and unordered bits are cleared to zero.)
4185 (define_expand "xststdc<Fvsx>"
4186   [(set (match_dup 3)
4187         (compare:CCFP
4188          (unspec:SFDF
4189           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4190            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4191           UNSPEC_VSX_STSTDC)
4192          (match_dup 4)))
4193    (set (match_operand:SI 0 "register_operand" "=r")
4194         (eq:SI (match_dup 3)
4195                (const_int 0)))]
4196   "TARGET_P9_VECTOR"
4197 {
4198   operands[3] = gen_reg_rtx (CCFPmode);
4199   operands[4] = CONST0_RTX (SImode);
4200 })
4201
4202 ;; The VSX Scalar Test Negative Quad-Precision
4203 (define_expand "xststdcnegqp"
4204   [(set (match_dup 2)
4205         (compare:CCFP
4206          (unspec:KF
4207           [(match_operand:KF 1 "altivec_register_operand" "v")
4208            (const_int 0)]
4209           UNSPEC_VSX_STSTDC)
4210          (const_int 0)))
4211    (set (match_operand:SI 0 "register_operand" "=r")
4212         (lt:SI (match_dup 2)
4213                (const_int 0)))]
4214   "TARGET_P9_VECTOR"
4215 {
4216   operands[2] = gen_reg_rtx (CCFPmode);
4217 })
4218
4219 ;; The VSX Scalar Test Negative Double- and Single-Precision
4220 (define_expand "xststdcneg<Fvsx>"
4221   [(set (match_dup 2)
4222         (compare:CCFP
4223          (unspec:SFDF
4224           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4225            (const_int 0)]
4226           UNSPEC_VSX_STSTDC)
4227          (match_dup 3)))
4228    (set (match_operand:SI 0 "register_operand" "=r")
4229         (lt:SI (match_dup 2)
4230                (const_int 0)))]
4231   "TARGET_P9_VECTOR"
4232 {
4233   operands[2] = gen_reg_rtx (CCFPmode);
4234   operands[3] = CONST0_RTX (SImode);
4235 })
4236
4237 (define_insn "*xststdcqp"
4238   [(set (match_operand:CCFP 0 "" "=y")
4239         (compare:CCFP
4240          (unspec:KF [(match_operand:KF 1 "altivec_register_operand" "v")
4241                      (match_operand:SI 2 "u7bit_cint_operand" "n")]
4242           UNSPEC_VSX_STSTDC)
4243          (const_int 0)))]
4244   "TARGET_P9_VECTOR"
4245   "xststdcqp %0,%1,%2"
4246   [(set_attr "type" "fpcompare")])
4247
4248 (define_insn "*xststdc<Fvsx>"
4249   [(set (match_operand:CCFP 0 "" "=y")
4250         (compare:CCFP
4251          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4252                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
4253           UNSPEC_VSX_STSTDC)
4254          (match_operand:SI 3 "zero_constant" "j")))]
4255   "TARGET_P9_VECTOR"
4256   "xststdc<Fvsx> %0,%x1,%2"
4257   [(set_attr "type" "fpcompare")])
4258
4259 ;; VSX Vector Extract Exponent Double and Single Precision
4260 (define_insn "xvxexp<VSs>"
4261   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4262         (unspec:VSX_F
4263          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4264          UNSPEC_VSX_VXEXP))]
4265   "TARGET_P9_VECTOR"
4266   "xvxexp<VSs> %x0,%x1"
4267   [(set_attr "type" "vecsimple")])
4268
4269 ;; VSX Vector Extract Significand Double and Single Precision
4270 (define_insn "xvxsig<VSs>"
4271   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4272         (unspec:VSX_F
4273          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4274          UNSPEC_VSX_VXSIG))]
4275   "TARGET_P9_VECTOR"
4276   "xvxsig<VSs> %x0,%x1"
4277   [(set_attr "type" "vecsimple")])
4278
4279 ;; VSX Vector Insert Exponent Double and Single Precision
4280 (define_insn "xviexp<VSs>"
4281   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4282         (unspec:VSX_F
4283          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4284           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4285          UNSPEC_VSX_VIEXP))]
4286   "TARGET_P9_VECTOR"
4287   "xviexp<VSs> %x0,%x1,%x2"
4288   [(set_attr "type" "vecsimple")])
4289
4290 ;; VSX Vector Test Data Class Double and Single Precision
4291 ;; The corresponding elements of the result vector are all ones
4292 ;; if any of the conditions tested by operand 3 are satisfied.
4293 (define_insn "xvtstdc<VSs>"
4294   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4295         (unspec:<VSI>
4296          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4297           (match_operand:SI 2 "u7bit_cint_operand" "n")]
4298          UNSPEC_VSX_VTSTDC))]
4299   "TARGET_P9_VECTOR"
4300   "xvtstdc<VSs> %x0,%x1,%2"
4301   [(set_attr "type" "vecsimple")])
4302
4303 ;; ISA 3.0 String Operations Support
4304
4305 ;; Compare vectors producing a vector result and a predicate, setting CR6
4306 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4307 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4308 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4309 ;; to use Power8 instructions.
4310 (define_insn "*vsx_ne_<mode>_p"
4311   [(set (reg:CC CR6_REGNO)
4312         (unspec:CC
4313          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4314                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4315          UNSPEC_PREDICATE))
4316    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4317         (ne:VSX_EXTRACT_I (match_dup 1)
4318                           (match_dup 2)))]
4319   "TARGET_P9_VECTOR"
4320   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4321   [(set_attr "type" "vecsimple")])
4322
4323 (define_insn "*vector_nez_<mode>_p"
4324   [(set (reg:CC CR6_REGNO)
4325         (unspec:CC [(unspec:VI
4326                      [(match_operand:VI 1 "gpc_reg_operand" "v")
4327                       (match_operand:VI 2 "gpc_reg_operand" "v")]
4328                      UNSPEC_NEZ_P)]
4329          UNSPEC_PREDICATE))
4330    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4331         (unspec:VI [(match_dup 1)
4332                     (match_dup 2)]
4333          UNSPEC_NEZ_P))]
4334   "TARGET_P9_VECTOR"
4335   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4336   [(set_attr "type" "vecsimple")])
4337
4338 ;; Load VSX Vector with Length
4339 (define_expand "lxvl"
4340   [(set (match_dup 3)
4341         (match_operand:DI 2 "register_operand"))
4342    (set (match_operand:V16QI 0 "vsx_register_operand")
4343         (unspec:V16QI
4344          [(match_operand:DI 1 "gpc_reg_operand")
4345           (match_dup 3)]
4346          UNSPEC_LXVL))]
4347   "TARGET_P9_VECTOR && TARGET_64BIT"
4348 {
4349   operands[3] = gen_reg_rtx (DImode);
4350 })
4351
4352 (define_insn "*lxvl"
4353   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4354         (unspec:V16QI
4355          [(match_operand:DI 1 "gpc_reg_operand" "b")
4356           (match_operand:DI 2 "register_operand" "+r")]
4357          UNSPEC_LXVL))]
4358   "TARGET_P9_VECTOR && TARGET_64BIT"
4359   "sldi %2,%2, 56\; lxvl %x0,%1,%2"
4360   [(set_attr "length" "8")
4361    (set_attr "type" "vecload")])
4362
4363 (define_insn "lxvll"
4364   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4365         (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4366                        (match_operand:DI 2 "register_operand" "r")]
4367                       UNSPEC_LXVLL))]
4368   "TARGET_P9_VECTOR"
4369   "lxvll %x0,%1,%2"
4370   [(set_attr "type" "vecload")])
4371
4372 ;; Expand for builtin xl_len_r
4373 (define_expand "xl_len_r"
4374   [(match_operand:V16QI 0 "vsx_register_operand")
4375    (match_operand:DI 1 "register_operand")
4376    (match_operand:DI 2 "register_operand")]
4377   ""
4378 {
4379   rtx shift_mask = gen_reg_rtx (V16QImode);
4380   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4381   rtx tmp = gen_reg_rtx (DImode);
4382
4383   emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4384   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4385   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4386   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4387              shift_mask));
4388   DONE;
4389 })
4390
4391 (define_insn "stxvll"
4392   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4393         (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4394                        (match_operand:DI 2 "register_operand" "r")]
4395                       UNSPEC_STXVLL))]
4396   "TARGET_P9_VECTOR"
4397   "stxvll %x0,%1,%2"
4398   [(set_attr "type" "vecstore")])
4399
4400 ;; Store VSX Vector with Length
4401 (define_expand "stxvl"
4402   [(set (match_dup 3)
4403         (match_operand:DI 2 "register_operand"))
4404    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
4405         (unspec:V16QI
4406          [(match_operand:V16QI 0 "vsx_register_operand")
4407           (match_dup 3)]
4408          UNSPEC_STXVL))]
4409   "TARGET_P9_VECTOR && TARGET_64BIT"
4410 {
4411   operands[3] = gen_reg_rtx (DImode);
4412 })
4413
4414 (define_insn "*stxvl"
4415   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4416         (unspec:V16QI
4417          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4418           (match_operand:DI 2 "register_operand" "+r")]
4419          UNSPEC_STXVL))]
4420   "TARGET_P9_VECTOR && TARGET_64BIT"
4421   "sldi %2,%2,56\;stxvl %x0,%1,%2"
4422   [(set_attr "length" "8")
4423    (set_attr "type" "vecstore")])
4424
4425 ;; Expand for builtin xst_len_r
4426 (define_expand "xst_len_r"
4427   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
4428    (match_operand:DI 1 "register_operand" "b")
4429    (match_operand:DI 2 "register_operand" "r")]
4430   "UNSPEC_XST_LEN_R"
4431 {
4432   rtx shift_mask = gen_reg_rtx (V16QImode);
4433   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4434   rtx tmp = gen_reg_rtx (DImode);
4435
4436   emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
4437   emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
4438              shift_mask));
4439   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4440   emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
4441   DONE;
4442 })
4443
4444 ;; Vector Compare Not Equal Byte
4445 (define_insn "vcmpneb"
4446   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
4447         (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v")
4448                        (match_operand:V16QI 2 "altivec_register_operand" "v")]
4449          UNSPEC_VCMPNEB))]
4450   "TARGET_P9_VECTOR"
4451   "vcmpneb %0,%1,%2"
4452   [(set_attr "type" "vecsimple")])
4453
4454 ;; Vector Compare Not Equal or Zero Byte
4455 (define_insn "vcmpnezb"
4456   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
4457         (unspec:V16QI
4458          [(match_operand:V16QI 1 "altivec_register_operand" "v")
4459           (match_operand:V16QI 2 "altivec_register_operand" "v")]
4460          UNSPEC_VCMPNEZB))]
4461   "TARGET_P9_VECTOR"
4462   "vcmpnezb %0,%1,%2"
4463   [(set_attr "type" "vecsimple")])
4464
4465 ;; Vector Compare Not Equal Half Word
4466 (define_insn "vcmpneh"
4467   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
4468         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
4469                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
4470          UNSPEC_VCMPNEH))]
4471   "TARGET_P9_VECTOR"
4472   "vcmpneh %0,%1,%2"
4473   [(set_attr "type" "vecsimple")])
4474
4475 ;; Vector Compare Not Equal or Zero Half Word
4476 (define_insn "vcmpnezh"
4477   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
4478         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
4479                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
4480          UNSPEC_VCMPNEZH))]
4481   "TARGET_P9_VECTOR"
4482   "vcmpnezh %0,%1,%2"
4483   [(set_attr "type" "vecsimple")])
4484
4485 ;; Vector Compare Not Equal Word
4486 (define_insn "vcmpnew"
4487   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
4488         (unspec:V4SI
4489          [(match_operand:V4SI 1 "altivec_register_operand" "v")
4490           (match_operand:V4SI 2 "altivec_register_operand" "v")]
4491          UNSPEC_VCMPNEH))]
4492   "TARGET_P9_VECTOR"
4493   "vcmpnew %0,%1,%2"
4494   [(set_attr "type" "vecsimple")])
4495
4496 ;; Vector Compare Not Equal or Zero Word
4497 (define_insn "vcmpnezw"
4498   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
4499         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
4500                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
4501          UNSPEC_VCMPNEZW))]
4502   "TARGET_P9_VECTOR"
4503   "vcmpnezw %0,%1,%2"
4504   [(set_attr "type" "vecsimple")])
4505
4506 ;; Vector Count Leading Zero Least-Significant Bits Byte
4507 (define_insn "vclzlsbb"
4508   [(set (match_operand:SI 0 "register_operand" "=r")
4509         (unspec:SI
4510          [(match_operand:V16QI 1 "altivec_register_operand" "v")]
4511          UNSPEC_VCLZLSBB))]
4512   "TARGET_P9_VECTOR"
4513   "vclzlsbb %0,%1"
4514   [(set_attr "type" "vecsimple")])
4515
4516 ;; Vector Count Trailing Zero Least-Significant Bits Byte
4517 (define_insn "vctzlsbb"
4518   [(set (match_operand:SI 0 "register_operand" "=r")
4519         (unspec:SI
4520          [(match_operand:V16QI 1 "altivec_register_operand" "v")]
4521          UNSPEC_VCTZLSBB))]
4522   "TARGET_P9_VECTOR"
4523   "vctzlsbb %0,%1"
4524   [(set_attr "type" "vecsimple")])
4525
4526 ;; Vector Extract Unsigned Byte Left-Indexed
4527 (define_insn "vextublx"
4528   [(set (match_operand:SI 0 "register_operand" "=r")
4529         (unspec:SI
4530          [(match_operand:SI 1 "register_operand" "r")
4531           (match_operand:V16QI 2 "altivec_register_operand" "v")]
4532          UNSPEC_VEXTUBLX))]
4533   "TARGET_P9_VECTOR"
4534   "vextublx %0,%1,%2"
4535   [(set_attr "type" "vecsimple")])
4536
4537 ;; Vector Extract Unsigned Byte Right-Indexed
4538 (define_insn "vextubrx"
4539   [(set (match_operand:SI 0 "register_operand" "=r")
4540         (unspec:SI
4541          [(match_operand:SI 1 "register_operand" "r")
4542           (match_operand:V16QI 2 "altivec_register_operand" "v")]
4543          UNSPEC_VEXTUBRX))]
4544   "TARGET_P9_VECTOR"
4545   "vextubrx %0,%1,%2"
4546   [(set_attr "type" "vecsimple")])
4547
4548 ;; Vector Extract Unsigned Half Word Left-Indexed
4549 (define_insn "vextuhlx"
4550   [(set (match_operand:SI 0 "register_operand" "=r")
4551         (unspec:SI
4552          [(match_operand:SI 1 "register_operand" "r")
4553           (match_operand:V8HI 2 "altivec_register_operand" "v")]
4554          UNSPEC_VEXTUHLX))]
4555   "TARGET_P9_VECTOR"
4556   "vextuhlx %0,%1,%2"
4557   [(set_attr "type" "vecsimple")])
4558
4559 ;; Vector Extract Unsigned Half Word Right-Indexed
4560 (define_insn "vextuhrx"
4561   [(set (match_operand:SI 0 "register_operand" "=r")
4562         (unspec:SI
4563          [(match_operand:SI 1 "register_operand" "r")
4564           (match_operand:V8HI 2 "altivec_register_operand" "v")]
4565          UNSPEC_VEXTUHRX))]
4566   "TARGET_P9_VECTOR"
4567   "vextuhrx %0,%1,%2"
4568   [(set_attr "type" "vecsimple")])
4569
4570 ;; Vector Extract Unsigned Word Left-Indexed
4571 (define_insn "vextuwlx"
4572   [(set (match_operand:SI 0 "register_operand" "=r")
4573         (unspec:SI
4574          [(match_operand:SI 1 "register_operand" "r")
4575           (match_operand:V4SI 2 "altivec_register_operand" "v")]
4576          UNSPEC_VEXTUWLX))]
4577   "TARGET_P9_VECTOR"
4578   "vextuwlx %0,%1,%2"
4579   [(set_attr "type" "vecsimple")])
4580
4581 ;; Vector Extract Unsigned Word Right-Indexed
4582 (define_insn "vextuwrx"
4583   [(set (match_operand:SI 0 "register_operand" "=r")
4584         (unspec:SI
4585          [(match_operand:SI 1 "register_operand" "r")
4586           (match_operand:V4SI 2 "altivec_register_operand" "v")]
4587          UNSPEC_VEXTUWRX))]
4588   "TARGET_P9_VECTOR"
4589   "vextuwrx %0,%1,%2"
4590   [(set_attr "type" "vecsimple")])
4591
4592 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
4593 ;; endian version needs to adjust the byte number, and the V4SI element in
4594 ;; vinsert4b.
4595 (define_expand "vextract4b"
4596   [(set (match_operand:DI 0 "gpc_reg_operand")
4597         (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
4598                     (match_operand:QI 2 "const_0_to_12_operand")]
4599                    UNSPEC_XXEXTRACTUW))]
4600   "TARGET_P9_VECTOR"
4601 {
4602   if (!VECTOR_ELT_ORDER_BIG)
4603     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
4604 })
4605
4606 (define_insn_and_split "*vextract4b_internal"
4607   [(set (match_operand:DI 0 "gpc_reg_operand" "=wj,r")
4608         (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand" "wa,v")
4609                     (match_operand:QI 2 "const_0_to_12_operand" "n,n")]
4610                    UNSPEC_XXEXTRACTUW))]
4611   "TARGET_P9_VECTOR"
4612   "@
4613    xxextractuw %x0,%x1,%2
4614    #"
4615   "&& reload_completed && int_reg_operand (operands[0], DImode)"
4616   [(const_int 0)]
4617 {
4618   rtx op0 = operands[0];
4619   rtx op1 = operands[1];
4620   rtx op2 = operands[2];
4621   rtx op0_si = gen_rtx_REG (SImode, REGNO (op0));
4622   rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (op1));
4623
4624   emit_move_insn (op0, op2);
4625   if (VECTOR_ELT_ORDER_BIG)
4626     emit_insn (gen_vextuwlx (op0_si, op0_si, op1_v4si));
4627   else
4628     emit_insn (gen_vextuwrx (op0_si, op0_si, op1_v4si));
4629   DONE;
4630 }
4631   [(set_attr "type" "vecperm")])
4632
4633 (define_expand "vinsert4b"
4634   [(set (match_operand:V16QI 0 "vsx_register_operand")
4635         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
4636                        (match_operand:V16QI 2 "vsx_register_operand")
4637                        (match_operand:QI 3 "const_0_to_12_operand")]
4638                    UNSPEC_XXINSERTW))]
4639   "TARGET_P9_VECTOR"
4640 {
4641   if (!VECTOR_ELT_ORDER_BIG)
4642     {
4643       rtx op1 = operands[1];
4644       rtx v4si_tmp = gen_reg_rtx (V4SImode);
4645       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
4646       operands[1] = v4si_tmp;
4647       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
4648     }
4649 })
4650
4651 (define_insn "*vinsert4b_internal"
4652   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4653         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
4654                        (match_operand:V16QI 2 "vsx_register_operand" "0")
4655                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
4656                    UNSPEC_XXINSERTW))]
4657   "TARGET_P9_VECTOR"
4658   "xxinsertw %x0,%x1,%3"
4659   [(set_attr "type" "vecperm")])
4660
4661 (define_expand "vinsert4b_di"
4662   [(set (match_operand:V16QI 0 "vsx_register_operand")
4663         (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand")
4664                        (match_operand:V16QI 2 "vsx_register_operand")
4665                        (match_operand:QI 3 "const_0_to_12_operand")]
4666                    UNSPEC_XXINSERTW))]
4667   "TARGET_P9_VECTOR"
4668 {
4669   if (!VECTOR_ELT_ORDER_BIG)
4670     operands[3] = GEN_INT (12 - INTVAL (operands[3]));
4671 })
4672
4673 (define_insn "*vinsert4b_di_internal"
4674   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4675         (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand" "wj")
4676                        (match_operand:V16QI 2 "vsx_register_operand" "0")
4677                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
4678                    UNSPEC_XXINSERTW))]
4679   "TARGET_P9_VECTOR"
4680   "xxinsertw %x0,%x1,%3"
4681   [(set_attr "type" "vecperm")])
4682
4683 ;; Generate vector extract four float 32 values from left four elements
4684 ;; of eight element vector of float 16 values.
4685 (define_expand "vextract_fp_from_shorth"
4686   [(set (match_operand:V4SF 0 "register_operand" "=wa")
4687         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
4688    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
4689   "TARGET_P9_VECTOR"
4690 {
4691   int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
4692   int i;
4693
4694   rtx rvals[16];
4695   rtx mask = gen_reg_rtx (V16QImode);
4696   rtx tmp = gen_reg_rtx (V16QImode);
4697   rtvec v;
4698
4699   for (i = 0; i < 16; i++)
4700     rvals[i] = GEN_INT (vals[i]);
4701
4702   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
4703      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
4704      src half words 0,1,2,3 for the conversion instruction.  */
4705   v = gen_rtvec_v (16, rvals);
4706   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
4707   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
4708                                           operands[1], mask));
4709   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
4710   DONE;
4711 })
4712
4713 ;; Generate vector extract four float 32 values from right four elements
4714 ;; of eight element vector of float 16 values.
4715 (define_expand "vextract_fp_from_shortl"
4716   [(set (match_operand:V4SF 0 "register_operand" "=wa")
4717         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
4718         UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
4719   "TARGET_P9_VECTOR"
4720 {
4721   int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
4722   int i;
4723   rtx rvals[16];
4724   rtx mask = gen_reg_rtx (V16QImode);
4725   rtx tmp = gen_reg_rtx (V16QImode);
4726   rtvec v;
4727
4728   for (i = 0; i < 16; i++)
4729     rvals[i] = GEN_INT (vals[i]);
4730
4731   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
4732      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
4733      src half words 4,5,6,7 for the conversion instruction.  */
4734   v = gen_rtvec_v (16, rvals);
4735   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
4736   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
4737                                           operands[1], mask));
4738   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
4739   DONE;
4740 })
4741
4742 ;; Support for ISA 3.0 vector byte reverse
4743
4744 ;; Swap all bytes with in a vector
4745 (define_insn "p9_xxbrq_v1ti"
4746   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
4747         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
4748   "TARGET_P9_VECTOR"
4749   "xxbrq %x0,%x1"
4750   [(set_attr "type" "vecperm")])
4751
4752 (define_expand "p9_xxbrq_v16qi"
4753   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
4754    (use (match_operand:V16QI 1 "vsx_register_operand" "=wa"))]
4755   "TARGET_P9_VECTOR"
4756 {
4757   rtx op0 = gen_lowpart (V1TImode, operands[0]);
4758   rtx op1 = gen_lowpart (V1TImode, operands[1]);
4759   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
4760   DONE;
4761 })
4762
4763 ;; Swap all bytes in each 64-bit element
4764 (define_insn "p9_xxbrd_<mode>"
4765   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4766         (bswap:VSX_D (match_operand:VSX_D 1 "vsx_register_operand" "wa")))]
4767   "TARGET_P9_VECTOR"
4768   "xxbrd %x0,%x1"
4769   [(set_attr "type" "vecperm")])
4770
4771 ;; Swap all bytes in each 32-bit element
4772 (define_insn "p9_xxbrw_<mode>"
4773   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4774         (bswap:VSX_W (match_operand:VSX_W 1 "vsx_register_operand" "wa")))]
4775   "TARGET_P9_VECTOR"
4776   "xxbrw %x0,%x1"
4777   [(set_attr "type" "vecperm")])
4778
4779 ;; Swap all bytes in each 16-bit element
4780 (define_insn "p9_xxbrh_v8hi"
4781   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
4782         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
4783   "TARGET_P9_VECTOR"
4784   "xxbrh %x0,%x1"
4785   [(set_attr "type" "vecperm")])
4786 \f
4787
4788 ;; Operand numbers for the following peephole2
4789 (define_constants
4790   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
4791    (SFBOOL_TMP_VSX               1)             ;; vector temporary
4792    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
4793    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
4794    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
4795    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
4796    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
4797    (SFBOOL_SHL_D                 7)             ;; shift left dest
4798    (SFBOOL_SHL_A                 8)             ;; shift left arg
4799    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
4800    (SFBOOL_MFVSR_A_V4SF         10)             ;; SFBOOL_MFVSR_A as V4SFmode
4801    (SFBOOL_BOOL_A_DI            11)             ;; SFBOOL_BOOL_A1/A2 as DImode
4802    (SFBOOL_TMP_VSX_DI           12)             ;; SFBOOL_TMP_VSX as DImode
4803    (SFBOOL_MTVSR_D_V4SF         13)])           ;; SFBOOL_MTVSRD_D as V4SFmode
4804
4805 ;; Attempt to optimize some common GLIBC operations using logical operations to
4806 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
4807 ;; after macro expansion that looks like:
4808 ;;
4809 ;;      typedef union {
4810 ;;        float value;
4811 ;;        uint32_t word;
4812 ;;      } ieee_float_shape_type;
4813 ;;
4814 ;;      float t1;
4815 ;;      int32_t is;
4816 ;;
4817 ;;      do {
4818 ;;        ieee_float_shape_type gf_u;
4819 ;;        gf_u.value = (t1);
4820 ;;        (is) = gf_u.word;
4821 ;;      } while (0);
4822 ;;
4823 ;;      do {
4824 ;;        ieee_float_shape_type sf_u;
4825 ;;        sf_u.word = (is & 0xfffff000);
4826 ;;        (t1) = sf_u.value;
4827 ;;      } while (0);
4828 ;;
4829 ;;
4830 ;; This would result in two direct move operations (convert to memory format,
4831 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
4832 ;; scalar format).  With this peephole, we eliminate the direct move to the
4833 ;; GPR, and instead move the integer mask value to the vector register after a
4834 ;; shift and do the VSX logical operation.
4835
4836 ;; The insns for dealing with SFmode in GPR registers looks like:
4837 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
4838 ;;
4839 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
4840 ;;
4841 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
4842 ;;
4843 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
4844 ;;
4845 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
4846 ;;
4847 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
4848
4849 (define_peephole2
4850   [(match_scratch:DI SFBOOL_TMP_GPR "r")
4851    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
4852
4853    ;; MFVSRWZ (aka zero_extend)
4854    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
4855         (zero_extend:DI
4856          (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
4857
4858    ;; AND/IOR/XOR operation on int
4859    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
4860         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
4861                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
4862
4863    ;; SLDI
4864    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
4865         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
4866                    (const_int 32)))
4867
4868    ;; MTVSRD
4869    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
4870         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
4871
4872   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
4873    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
4874       to compare registers, when the mode is different.  */
4875    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
4876    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
4877    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
4878    && (REG_P (operands[SFBOOL_BOOL_A2])
4879        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
4880    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
4881        || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
4882    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
4883        || (REG_P (operands[SFBOOL_BOOL_A2])
4884            && REGNO (operands[SFBOOL_MFVSR_D])
4885                 == REGNO (operands[SFBOOL_BOOL_A2])))
4886    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
4887    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
4888        || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
4889    && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
4890   [(set (match_dup SFBOOL_TMP_GPR)
4891         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
4892                    (const_int 32)))
4893
4894    (set (match_dup SFBOOL_TMP_VSX_DI)
4895         (match_dup SFBOOL_TMP_GPR))
4896
4897    (set (match_dup SFBOOL_MTVSR_D_V4SF)
4898         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
4899                           (match_dup SFBOOL_TMP_VSX)))]
4900 {
4901   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
4902   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
4903   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
4904   int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
4905   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
4906   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
4907
4908   if (CONST_INT_P (bool_a2))
4909     {
4910       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
4911       emit_move_insn (tmp_gpr, bool_a2);
4912       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
4913     }
4914   else
4915     {
4916       int regno_bool_a1 = REGNO (bool_a1);
4917       int regno_bool_a2 = REGNO (bool_a2);
4918       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
4919                           ? regno_bool_a2 : regno_bool_a1);
4920       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
4921     }
4922
4923   operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
4924   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
4925   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
4926 })