gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Mode attribute for vector floate and floato conversions
  25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
  26
  27 ;; Iterator for both scalar and vector floating point types supported by VSX
  28 (define_mode_iterator VSX_B [DF V4SF V2DF])
  29
  30 ;; Iterator for the 2 64-bit vector types
  31 (define_mode_iterator VSX_D [V2DF V2DI])
  32
  33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  34 ;; types that goes in a single vector register.
  35 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  36                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  37                                   TI
  38                                   V1TI])
  39
  40 ;; Iterator for 128-bit integer types that go in a single vector register.
  41 (define_mode_iterator VSX_TI [TI V1TI])
  42
  43 ;; Iterator for the 2 32-bit vector types
  44 (define_mode_iterator VSX_W [V4SF V4SI])
  45
  46 ;; Iterator for the DF types
  47 (define_mode_iterator VSX_DF [V2DF DF])
  48
  49 ;; Iterator for vector floating point types supported by VSX
  50 (define_mode_iterator VSX_F [V4SF V2DF])
  51
  52 ;; Iterator for logical types supported by VSX
  53 (define_mode_iterator VSX_L [V16QI
  54                              V8HI
  55                              V4SI
  56                              V2DI
  57                              V4SF
  58                              V2DF
  59                              V1TI
  60                              TI
  61                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  62                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  63
  64 ;; Iterator for memory moves.
  65 (define_mode_iterator VSX_M [V16QI
  66                              V8HI
  67                              V4SI
  68                              V2DI
  69                              V4SF
  70                              V2DF
  71                              V1TI
  72                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  73                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  74                              TI])
  75
  76 (define_mode_attr VSX_XXBR  [(V8HI  "h")
  77                              (V4SI  "w")
  78                              (V4SF  "w")
  79                              (V2DF  "d")
  80                              (V2DI  "d")
  81                              (V1TI  "q")])
  82
  83 ;; Map into the appropriate load/store name based on the type
  84 (define_mode_attr VSm  [(V16QI "vw4")
  85                         (V8HI  "vw4")
  86                         (V4SI  "vw4")
  87                         (V4SF  "vw4")
  88                         (V2DF  "vd2")
  89                         (V2DI  "vd2")
  90                         (DF    "d")
  91                         (TF    "vd2")
  92                         (KF    "vd2")
  93                         (V1TI  "vd2")
  94                         (TI    "vd2")])
  95
  96 ;; Map the register class used
  97 (define_mode_attr VSr   [(V16QI "v")
  98                          (V8HI  "v")
  99                          (V4SI  "v")
 100                          (V4SF  "wa")
 101                          (V2DI  "wa")
 102                          (V2DF  "wa")
 103                          (DI    "wa")
 104                          (DF    "wa")
 105                          (SF    "wa")
 106                          (TF    "wa")
 107                          (KF    "wa")
 108                          (V1TI  "v")
 109                          (TI    "wa")])
 110
 111 ;; What value we need in the "isa" field, to make the IEEE QP float work.
 112 (define_mode_attr VSisa [(V16QI "*")
 113                          (V8HI  "*")
 114                          (V4SI  "*")
 115                          (V4SF  "*")
 116                          (V2DI  "*")
 117                          (V2DF  "*")
 118                          (DI    "*")
 119                          (DF    "*")
 120                          (SF    "*")
 121                          (V1TI  "*")
 122                          (TI    "*")
 123                          (TF    "p9tf")
 124                          (KF    "p9kf")])
 125
 126 ;; A mode attribute to disparage use of GPR registers, except for scalar
 127 ;; integer modes.
 128 (define_mode_attr ??r   [(V16QI "??r")
 129                          (V8HI  "??r")
 130                          (V4SI  "??r")
 131                          (V4SF  "??r")
 132                          (V2DI  "??r")
 133                          (V2DF  "??r")
 134                          (V1TI  "??r")
 135                          (KF    "??r")
 136                          (TF    "??r")
 137                          (TI    "r")])
 138
 139 ;; A mode attribute used for 128-bit constant values.
 140 (define_mode_attr nW    [(V16QI "W")
 141                          (V8HI  "W")
 142                          (V4SI  "W")
 143                          (V4SF  "W")
 144                          (V2DI  "W")
 145                          (V2DF  "W")
 146                          (V1TI  "W")
 147                          (KF    "W")
 148                          (TF    "W")
 149                          (TI    "n")])
 150
 151 ;; Same size integer type for floating point data
 152 (define_mode_attr VSi [(V4SF  "v4si")
 153                        (V2DF  "v2di")
 154                        (DF    "di")])
 155
 156 (define_mode_attr VSI [(V4SF  "V4SI")
 157                        (V2DF  "V2DI")
 158                        (DF    "DI")])
 159
 160 ;; Word size for same size conversion
 161 (define_mode_attr VSc [(V4SF "w")
 162                        (V2DF "d")
 163                        (DF   "d")])
 164
 165 ;; Map into either s or v, depending on whether this is a scalar or vector
 166 ;; operation
 167 (define_mode_attr VSv   [(V16QI "v")
 168                          (V8HI  "v")
 169                          (V4SI  "v")
 170                          (V4SF  "v")
 171                          (V2DI  "v")
 172                          (V2DF  "v")
 173                          (V1TI  "v")
 174                          (DF    "s")
 175                          (KF    "v")])
 176
 177 ;; Appropriate type for add ops (and other simple FP ops)
 178 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 179                                  (V4SF "vecfloat")
 180                                  (DF   "fp")])
 181
 182 ;; Appropriate type for multiply ops
 183 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 184                                  (V4SF "vecfloat")
 185                                  (DF   "dmul")])
 186
 187 ;; Appropriate type for divide ops.
 188 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 189                                  (V4SF "vecfdiv")
 190                                  (DF   "ddiv")])
 191
 192 ;; Map the scalar mode for a vector type
 193 (define_mode_attr VS_scalar [(V1TI      "TI")
 194                              (V2DF      "DF")
 195                              (V2DI      "DI")
 196                              (V4SF      "SF")
 197                              (V4SI      "SI")
 198                              (V8HI      "HI")
 199                              (V16QI     "QI")])
 200
 201 ;; Map to a double-sized vector mode
 202 (define_mode_attr VS_double [(V4SI      "V8SI")
 203                              (V4SF      "V8SF")
 204                              (V2DI      "V4DI")
 205                              (V2DF      "V4DF")
 206                              (V1TI      "V2TI")])
 207
 208 ;; Iterators for loading constants with xxspltib
 209 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 210 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 211
 212 ;; Vector reverse byte modes
 213 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 214
 215 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 216 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 217 ;; done on ISA 2.07 and not just ISA 3.0.
 218 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 219 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 220 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
 221
 222 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 223                                      (V8HI "h")
 224                                      (V4SI "w")])
 225
 226 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 227 ;; insert to validate the operand number.
 228 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 229                                          (V8HI  "const_0_to_7_operand")
 230                                          (V4SI  "const_0_to_3_operand")])
 231
 232 ;; Mode attribute to give the constraint for vector extract and insert
 233 ;; operations.
 234 (define_mode_attr VSX_EX [(V16QI "v")
 235                           (V8HI  "v")
 236                           (V4SI  "wa")])
 237
 238 ;; Mode iterator for binary floating types other than double to
 239 ;; optimize convert to that floating point type from an extract
 240 ;; of an integer type
 241 (define_mode_iterator VSX_EXTRACT_FL [SF
 242                                       (IF "FLOAT128_2REG_P (IFmode)")
 243                                       (KF "TARGET_FLOAT128_HW")
 244                                       (TF "FLOAT128_2REG_P (TFmode)
 245                                            || (FLOAT128_IEEE_P (TFmode)
 246                                                && TARGET_FLOAT128_HW)")])
 247
 248 ;; Mode iterator for binary floating types that have a direct conversion
 249 ;; from 64-bit integer to floating point
 250 (define_mode_iterator FL_CONV [SF
 251                                DF
 252                                (KF "TARGET_FLOAT128_HW")
 253                                (TF "TARGET_FLOAT128_HW
 254                                     && FLOAT128_IEEE_P (TFmode)")])
 255
 256 ;; Iterator for the 2 short vector types to do a splat from an integer
 257 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 258
 259 ;; Mode attribute to give the count for the splat instruction to splat
 260 ;; the value in the 64-bit integer slot
 261 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 262
 263 ;; Mode attribute to give the suffix for the splat instruction
 264 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 265
 266 ;; Iterator for the move to mask instructions
 267 (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI])
 268 (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI])
 269
 270 ;; Constants for creating unspecs
 271 (define_c_enum "unspec"
 272   [UNSPEC_VSX_CONCAT
 273    UNSPEC_VSX_CVDPSXWS
 274    UNSPEC_VSX_CVDPUXWS
 275    UNSPEC_VSX_CVSPDP
 276    UNSPEC_VSX_CVHPSP
 277    UNSPEC_VSX_CVSPDPN
 278    UNSPEC_VSX_CVDPSPN
 279    UNSPEC_VSX_CVSXWDP
 280    UNSPEC_VSX_CVUXWDP
 281    UNSPEC_VSX_CVSXDSP
 282    UNSPEC_VSX_CVUXDSP
 283    UNSPEC_VSX_FLOAT2
 284    UNSPEC_VSX_UNS_FLOAT2
 285    UNSPEC_VSX_FLOATE
 286    UNSPEC_VSX_UNS_FLOATE
 287    UNSPEC_VSX_FLOATO
 288    UNSPEC_VSX_UNS_FLOATO
 289    UNSPEC_VSX_TDIV
 290    UNSPEC_VSX_TSQRT
 291    UNSPEC_VSX_SET
 292    UNSPEC_VSX_ROUND_I
 293    UNSPEC_VSX_ROUND_IC
 294    UNSPEC_VSX_SLDWI
 295    UNSPEC_VSX_XXPERM
 296
 297    UNSPEC_VSX_XXSPLTW
 298    UNSPEC_VSX_XXSPLTD
 299    UNSPEC_VSX_DIVSD
 300    UNSPEC_VSX_DIVUD
 301    UNSPEC_VSX_MULSD
 302    UNSPEC_VSX_SIGN_EXTEND
 303    UNSPEC_VSX_XVCVBF16SPN
 304    UNSPEC_VSX_XVCVSPBF16
 305    UNSPEC_VSX_XVCVSPSXDS
 306    UNSPEC_VSX_XVCVSPHP
 307    UNSPEC_VSX_VSLO
 308    UNSPEC_VSX_EXTRACT
 309    UNSPEC_VSX_SXEXPDP
 310    UNSPEC_VSX_SXSIG
 311    UNSPEC_VSX_SIEXPDP
 312    UNSPEC_VSX_SIEXPQP
 313    UNSPEC_VSX_SCMPEXPDP
 314    UNSPEC_VSX_SCMPEXPQP
 315    UNSPEC_VSX_STSTDC
 316    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
 317    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
 318    UNSPEC_VSX_VXEXP
 319    UNSPEC_VSX_VXSIG
 320    UNSPEC_VSX_VIEXP
 321    UNSPEC_VSX_VTSTDC
 322    UNSPEC_VSX_VSIGNED2
 323
 324    UNSPEC_LXVL
 325    UNSPEC_LXVLL
 326    UNSPEC_LVSL_REG
 327    UNSPEC_LVSR_REG
 328    UNSPEC_STXVL
 329    UNSPEC_STXVLL
 330    UNSPEC_XL_LEN_R
 331    UNSPEC_XST_LEN_R
 332
 333    UNSPEC_VCLZLSBB
 334    UNSPEC_VCTZLSBB
 335    UNSPEC_VEXTUBLX
 336    UNSPEC_VEXTUHLX
 337    UNSPEC_VEXTUWLX
 338    UNSPEC_VEXTUBRX
 339    UNSPEC_VEXTUHRX
 340    UNSPEC_VEXTUWRX
 341    UNSPEC_VCMPNEB
 342    UNSPEC_VCMPNEZB
 343    UNSPEC_VCMPNEH
 344    UNSPEC_VCMPNEZH
 345    UNSPEC_VCMPNEW
 346    UNSPEC_VCMPNEZW
 347    UNSPEC_XXEXTRACTUW
 348    UNSPEC_XXINSERTW
 349    UNSPEC_VSX_FIRST_MATCH_INDEX
 350    UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
 351    UNSPEC_VSX_FIRST_MISMATCH_INDEX
 352    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
 353    UNSPEC_XXGENPCV
 354    UNSPEC_MTVSBM
 355    UNSPEC_VCNTMB
 356    UNSPEC_VEXPAND
 357    UNSPEC_VEXTRACT
 358    UNSPEC_EXTRACTL
 359    UNSPEC_EXTRACTR
 360    UNSPEC_INSERTL
 361    UNSPEC_INSERTR
 362    UNSPEC_REPLACE_ELT
 363    UNSPEC_REPLACE_UN
 364   ])
 365
 366 (define_int_iterator XVCVBF16   [UNSPEC_VSX_XVCVSPBF16
 367                                  UNSPEC_VSX_XVCVBF16SPN])
 368
 369 (define_int_attr xvcvbf16       [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
 370                                  (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")])
 371
 372 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
 373 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
 374
 375 ;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements
 376 (define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF])
 377 (define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
 378                                     (V2DI  "d") (V2DF "d")])
 379 (define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
 380                                   (V2DI  "3") (V2DF "3")])
 381 (define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
 382                                    (V2DI  "8") (V2DF "8")])
 383
 384 ;; VSX moves
 385
 386 ;; The patterns for LE permuted loads and stores come before the general
 387 ;; VSX moves so they match first.
 388 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 389   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
 390         (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
 391   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 392   "#"
 393   "&& 1"
 394   [(set (match_dup 2)
 395         (vec_select:<MODE>
 396           (match_dup 1)
 397           (parallel [(const_int 1) (const_int 0)])))
 398    (set (match_dup 0)
 399         (vec_select:<MODE>
 400           (match_dup 2)
 401           (parallel [(const_int 1) (const_int 0)])))]
 402 {
 403   rtx mem = operands[1];
 404
 405   /* Don't apply the swap optimization if we've already performed register
 406      allocation and the hard register destination is not in the altivec
 407      range.  */
 408   if ((MEM_ALIGN (mem) >= 128)
 409       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
 410           || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
 411     {
 412       rtx mem_address = XEXP (mem, 0);
 413       enum machine_mode mode = GET_MODE (mem);
 414
 415       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 416         {
 417           /* Replace the source memory address with masked address.  */
 418           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 419           emit_insn (lvx_set_expr);
 420           DONE;
 421         }
 422       else if (rs6000_quadword_masked_address_p (mem_address))
 423         {
 424           /* This rtl is already in the form that matches lvx
 425              instruction, so leave it alone.  */
 426           DONE;
 427         }
 428       /* Otherwise, fall through to transform into a swapping load.  */
 429     }
 430   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 431                                        : operands[0];
 432 }
 433   [(set_attr "type" "vecload")
 434    (set_attr "length" "8")])
 435
 436 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 437   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
 438         (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
 439   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 440   "#"
 441   "&& 1"
 442   [(set (match_dup 2)
 443         (vec_select:<MODE>
 444           (match_dup 1)
 445           (parallel [(const_int 2) (const_int 3)
 446                      (const_int 0) (const_int 1)])))
 447    (set (match_dup 0)
 448         (vec_select:<MODE>
 449           (match_dup 2)
 450           (parallel [(const_int 2) (const_int 3)
 451                      (const_int 0) (const_int 1)])))]
 452 {
 453   rtx mem = operands[1];
 454
 455   /* Don't apply the swap optimization if we've already performed register
 456      allocation and the hard register destination is not in the altivec
 457      range.  */
 458   if ((MEM_ALIGN (mem) >= 128)
 459       && (!HARD_REGISTER_P (operands[0])
 460           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 461     {
 462       rtx mem_address = XEXP (mem, 0);
 463       enum machine_mode mode = GET_MODE (mem);
 464
 465       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 466         {
 467           /* Replace the source memory address with masked address.  */
 468           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 469           emit_insn (lvx_set_expr);
 470           DONE;
 471         }
 472       else if (rs6000_quadword_masked_address_p (mem_address))
 473         {
 474           /* This rtl is already in the form that matches lvx
 475              instruction, so leave it alone.  */
 476           DONE;
 477         }
 478       /* Otherwise, fall through to transform into a swapping load.  */
 479     }
 480   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 481                                        : operands[0];
 482 }
 483   [(set_attr "type" "vecload")
 484    (set_attr "length" "8")])
 485
 486 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 487   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 488         (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
 489   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 490   "#"
 491   "&& 1"
 492   [(set (match_dup 2)
 493         (vec_select:V8HI
 494           (match_dup 1)
 495           (parallel [(const_int 4) (const_int 5)
 496                      (const_int 6) (const_int 7)
 497                      (const_int 0) (const_int 1)
 498                      (const_int 2) (const_int 3)])))
 499    (set (match_dup 0)
 500         (vec_select:V8HI
 501           (match_dup 2)
 502           (parallel [(const_int 4) (const_int 5)
 503                      (const_int 6) (const_int 7)
 504                      (const_int 0) (const_int 1)
 505                      (const_int 2) (const_int 3)])))]
 506 {
 507   rtx mem = operands[1];
 508
 509   /* Don't apply the swap optimization if we've already performed register
 510      allocation and the hard register destination is not in the altivec
 511      range.  */
 512   if ((MEM_ALIGN (mem) >= 128)
 513       && (!HARD_REGISTER_P (operands[0])
 514           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 515     {
 516       rtx mem_address = XEXP (mem, 0);
 517       enum machine_mode mode = GET_MODE (mem);
 518
 519       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 520         {
 521           /* Replace the source memory address with masked address.  */
 522           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 523           emit_insn (lvx_set_expr);
 524           DONE;
 525         }
 526       else if (rs6000_quadword_masked_address_p (mem_address))
 527         {
 528           /* This rtl is already in the form that matches lvx
 529              instruction, so leave it alone.  */
 530           DONE;
 531         }
 532       /* Otherwise, fall through to transform into a swapping load.  */
 533     }
 534   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 535                                        : operands[0];
 536 }
 537   [(set_attr "type" "vecload")
 538    (set_attr "length" "8")])
 539
 540 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 541   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 542         (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
 543   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 544   "#"
 545   "&& 1"
 546   [(set (match_dup 2)
 547         (vec_select:V16QI
 548           (match_dup 1)
 549           (parallel [(const_int 8) (const_int 9)
 550                      (const_int 10) (const_int 11)
 551                      (const_int 12) (const_int 13)
 552                      (const_int 14) (const_int 15)
 553                      (const_int 0) (const_int 1)
 554                      (const_int 2) (const_int 3)
 555                      (const_int 4) (const_int 5)
 556                      (const_int 6) (const_int 7)])))
 557    (set (match_dup 0)
 558         (vec_select:V16QI
 559           (match_dup 2)
 560           (parallel [(const_int 8) (const_int 9)
 561                      (const_int 10) (const_int 11)
 562                      (const_int 12) (const_int 13)
 563                      (const_int 14) (const_int 15)
 564                      (const_int 0) (const_int 1)
 565                      (const_int 2) (const_int 3)
 566                      (const_int 4) (const_int 5)
 567                      (const_int 6) (const_int 7)])))]
 568 {
 569   rtx mem = operands[1];
 570
 571   /* Don't apply the swap optimization if we've already performed register
 572      allocation and the hard register destination is not in the altivec
 573      range.  */
 574   if ((MEM_ALIGN (mem) >= 128)
 575       && (!HARD_REGISTER_P (operands[0])
 576           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 577     {
 578       rtx mem_address = XEXP (mem, 0);
 579       enum machine_mode mode = GET_MODE (mem);
 580
 581       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 582         {
 583           /* Replace the source memory address with masked address.  */
 584           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 585           emit_insn (lvx_set_expr);
 586           DONE;
 587         }
 588       else if (rs6000_quadword_masked_address_p (mem_address))
 589         {
 590           /* This rtl is already in the form that matches lvx
 591              instruction, so leave it alone.  */
 592           DONE;
 593         }
 594       /* Otherwise, fall through to transform into a swapping load.  */
 595     }
 596   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 597                                        : operands[0];
 598 }
 599   [(set_attr "type" "vecload")
 600    (set_attr "length" "8")])
 601
 602 (define_insn "*vsx_le_perm_store_<mode>"
 603   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
 604         (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
 605   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 606   "#"
 607   [(set_attr "type" "vecstore")
 608    (set_attr "length" "12")])
 609
 610 (define_split
 611   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 612         (match_operand:VSX_D 1 "vsx_register_operand"))]
 613   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 614   [(set (match_dup 2)
 615         (vec_select:<MODE>
 616           (match_dup 1)
 617           (parallel [(const_int 1) (const_int 0)])))
 618    (set (match_dup 0)
 619         (vec_select:<MODE>
 620           (match_dup 2)
 621           (parallel [(const_int 1) (const_int 0)])))]
 622 {
 623   rtx mem = operands[0];
 624
 625   /* Don't apply the swap optimization if we've already performed register
 626      allocation and the hard register source is not in the altivec range.  */
 627   if ((MEM_ALIGN (mem) >= 128)
 628       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 629           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 630     {
 631       rtx mem_address = XEXP (mem, 0);
 632       enum machine_mode mode = GET_MODE (mem);
 633       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 634         {
 635           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 636           emit_insn (stvx_set_expr);
 637           DONE;
 638         }
 639       else if (rs6000_quadword_masked_address_p (mem_address))
 640         {
 641           /* This rtl is already in the form that matches stvx instruction,
 642              so leave it alone.  */
 643           DONE;
 644         }
 645       /* Otherwise, fall through to transform into a swapping store.  */
 646     }
 647
 648   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 649                                        : operands[1];
 650 })
 651
 652 ;; The post-reload split requires that we re-permute the source
 653 ;; register in case it is still live.
 654 (define_split
 655   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 656         (match_operand:VSX_D 1 "vsx_register_operand"))]
 657   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 658   [(set (match_dup 1)
 659         (vec_select:<MODE>
 660           (match_dup 1)
 661           (parallel [(const_int 1) (const_int 0)])))
 662    (set (match_dup 0)
 663         (vec_select:<MODE>
 664           (match_dup 1)
 665           (parallel [(const_int 1) (const_int 0)])))
 666    (set (match_dup 1)
 667         (vec_select:<MODE>
 668           (match_dup 1)
 669           (parallel [(const_int 1) (const_int 0)])))]
 670   "")
 671
 672 (define_insn "*vsx_le_perm_store_<mode>"
 673   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
 674         (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
 675   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 676   "#"
 677   [(set_attr "type" "vecstore")
 678    (set_attr "length" "12")])
 679
 680 (define_split
 681   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 682         (match_operand:VSX_W 1 "vsx_register_operand"))]
 683   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 684   [(set (match_dup 2)
 685         (vec_select:<MODE>
 686           (match_dup 1)
 687           (parallel [(const_int 2) (const_int 3)
 688                      (const_int 0) (const_int 1)])))
 689    (set (match_dup 0)
 690         (vec_select:<MODE>
 691           (match_dup 2)
 692           (parallel [(const_int 2) (const_int 3)
 693                      (const_int 0) (const_int 1)])))]
 694 {
 695   rtx mem = operands[0];
 696
 697   /* Don't apply the swap optimization if we've already performed register
 698      allocation and the hard register source is not in the altivec range.  */
 699   if ((MEM_ALIGN (mem) >= 128)
 700       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 701           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 702     {
 703       rtx mem_address = XEXP (mem, 0);
 704       enum machine_mode mode = GET_MODE (mem);
 705       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 706         {
 707           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 708           emit_insn (stvx_set_expr);
 709           DONE;
 710         }
 711       else if (rs6000_quadword_masked_address_p (mem_address))
 712         {
 713           /* This rtl is already in the form that matches stvx instruction,
 714              so leave it alone.  */
 715           DONE;
 716         }
 717       /* Otherwise, fall through to transform into a swapping store.  */
 718     }
 719
 720   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 721                                        : operands[1];
 722 })
 723
 724 ;; The post-reload split requires that we re-permute the source
 725 ;; register in case it is still live.
 726 (define_split
 727   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 728         (match_operand:VSX_W 1 "vsx_register_operand"))]
 729   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 730   [(set (match_dup 1)
 731         (vec_select:<MODE>
 732           (match_dup 1)
 733           (parallel [(const_int 2) (const_int 3)
 734                      (const_int 0) (const_int 1)])))
 735    (set (match_dup 0)
 736         (vec_select:<MODE>
 737           (match_dup 1)
 738           (parallel [(const_int 2) (const_int 3)
 739                      (const_int 0) (const_int 1)])))
 740    (set (match_dup 1)
 741         (vec_select:<MODE>
 742           (match_dup 1)
 743           (parallel [(const_int 2) (const_int 3)
 744                      (const_int 0) (const_int 1)])))]
 745   "")
 746
 747 (define_insn "*vsx_le_perm_store_v8hi"
 748   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
 749         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 750   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 751   "#"
 752   [(set_attr "type" "vecstore")
 753    (set_attr "length" "12")])
 754
 755 (define_split
 756   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 757         (match_operand:V8HI 1 "vsx_register_operand"))]
 758   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 759   [(set (match_dup 2)
 760         (vec_select:V8HI
 761           (match_dup 1)
 762           (parallel [(const_int 4) (const_int 5)
 763                      (const_int 6) (const_int 7)
 764                      (const_int 0) (const_int 1)
 765                      (const_int 2) (const_int 3)])))
 766    (set (match_dup 0)
 767         (vec_select:V8HI
 768           (match_dup 2)
 769           (parallel [(const_int 4) (const_int 5)
 770                      (const_int 6) (const_int 7)
 771                      (const_int 0) (const_int 1)
 772                      (const_int 2) (const_int 3)])))]
 773 {
 774   rtx mem = operands[0];
 775
 776   /* Don't apply the swap optimization if we've already performed register
 777      allocation and the hard register source is not in the altivec range.  */
 778   if ((MEM_ALIGN (mem) >= 128)
 779       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 780           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 781     {
 782       rtx mem_address = XEXP (mem, 0);
 783       enum machine_mode mode = GET_MODE (mem);
 784       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 785         {
 786           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 787           emit_insn (stvx_set_expr);
 788           DONE;
 789         }
 790       else if (rs6000_quadword_masked_address_p (mem_address))
 791         {
 792           /* This rtl is already in the form that matches stvx instruction,
 793              so leave it alone.  */
 794           DONE;
 795         }
 796       /* Otherwise, fall through to transform into a swapping store.  */
 797     }
 798
 799   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 800                                        : operands[1];
 801 })
 802
 803 ;; The post-reload split requires that we re-permute the source
 804 ;; register in case it is still live.
 805 (define_split
 806   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 807         (match_operand:V8HI 1 "vsx_register_operand"))]
 808   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 809   [(set (match_dup 1)
 810         (vec_select:V8HI
 811           (match_dup 1)
 812           (parallel [(const_int 4) (const_int 5)
 813                      (const_int 6) (const_int 7)
 814                      (const_int 0) (const_int 1)
 815                      (const_int 2) (const_int 3)])))
 816    (set (match_dup 0)
 817         (vec_select:V8HI
 818           (match_dup 1)
 819           (parallel [(const_int 4) (const_int 5)
 820                      (const_int 6) (const_int 7)
 821                      (const_int 0) (const_int 1)
 822                      (const_int 2) (const_int 3)])))
 823    (set (match_dup 1)
 824         (vec_select:V8HI
 825           (match_dup 1)
 826           (parallel [(const_int 4) (const_int 5)
 827                      (const_int 6) (const_int 7)
 828                      (const_int 0) (const_int 1)
 829                      (const_int 2) (const_int 3)])))]
 830   "")
 831
 832 (define_insn "*vsx_le_perm_store_v16qi"
 833   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
 834         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 835   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 836   "#"
 837   [(set_attr "type" "vecstore")
 838    (set_attr "length" "12")])
 839
 840 (define_split
 841   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 842         (match_operand:V16QI 1 "vsx_register_operand"))]
 843   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 844   [(set (match_dup 2)
 845         (vec_select:V16QI
 846           (match_dup 1)
 847           (parallel [(const_int 8) (const_int 9)
 848                      (const_int 10) (const_int 11)
 849                      (const_int 12) (const_int 13)
 850                      (const_int 14) (const_int 15)
 851                      (const_int 0) (const_int 1)
 852                      (const_int 2) (const_int 3)
 853                      (const_int 4) (const_int 5)
 854                      (const_int 6) (const_int 7)])))
 855    (set (match_dup 0)
 856         (vec_select:V16QI
 857           (match_dup 2)
 858           (parallel [(const_int 8) (const_int 9)
 859                      (const_int 10) (const_int 11)
 860                      (const_int 12) (const_int 13)
 861                      (const_int 14) (const_int 15)
 862                      (const_int 0) (const_int 1)
 863                      (const_int 2) (const_int 3)
 864                      (const_int 4) (const_int 5)
 865                      (const_int 6) (const_int 7)])))]
 866 {
 867   rtx mem = operands[0];
 868
 869   /* Don't apply the swap optimization if we've already performed register
 870      allocation and the hard register source is not in the altivec range.  */
 871   if ((MEM_ALIGN (mem) >= 128)
 872       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 873           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 874     {
 875       rtx mem_address = XEXP (mem, 0);
 876       enum machine_mode mode = GET_MODE (mem);
 877       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 878         {
 879           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 880           emit_insn (stvx_set_expr);
 881           DONE;
 882         }
 883       else if (rs6000_quadword_masked_address_p (mem_address))
 884         {
 885           /* This rtl is already in the form that matches stvx instruction,
 886              so leave it alone.  */
 887           DONE;
 888         }
 889       /* Otherwise, fall through to transform into a swapping store.  */
 890     }
 891
 892   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 893                                        : operands[1];
 894 })
 895
 896 ;; The post-reload split requires that we re-permute the source
 897 ;; register in case it is still live.
 898 (define_split
 899   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 900         (match_operand:V16QI 1 "vsx_register_operand"))]
 901   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 902   [(set (match_dup 1)
 903         (vec_select:V16QI
 904           (match_dup 1)
 905           (parallel [(const_int 8) (const_int 9)
 906                      (const_int 10) (const_int 11)
 907                      (const_int 12) (const_int 13)
 908                      (const_int 14) (const_int 15)
 909                      (const_int 0) (const_int 1)
 910                      (const_int 2) (const_int 3)
 911                      (const_int 4) (const_int 5)
 912                      (const_int 6) (const_int 7)])))
 913    (set (match_dup 0)
 914         (vec_select:V16QI
 915           (match_dup 1)
 916           (parallel [(const_int 8) (const_int 9)
 917                      (const_int 10) (const_int 11)
 918                      (const_int 12) (const_int 13)
 919                      (const_int 14) (const_int 15)
 920                      (const_int 0) (const_int 1)
 921                      (const_int 2) (const_int 3)
 922                      (const_int 4) (const_int 5)
 923                      (const_int 6) (const_int 7)])))
 924    (set (match_dup 1)
 925         (vec_select:V16QI
 926           (match_dup 1)
 927           (parallel [(const_int 8) (const_int 9)
 928                      (const_int 10) (const_int 11)
 929                      (const_int 12) (const_int 13)
 930                      (const_int 14) (const_int 15)
 931                      (const_int 0) (const_int 1)
 932                      (const_int 2) (const_int 3)
 933                      (const_int 4) (const_int 5)
 934                      (const_int 6) (const_int 7)])))]
 935   "")
 936
 937 ;; Little endian word swapping for 128-bit types that are either scalars or the
 938 ;; special V1TI container class, which it is not appropriate to use vec_select
 939 ;; for the type.
 940 (define_insn "*vsx_le_permute_<mode>"
 941   [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
 942         (rotate:VSX_TI
 943          (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
 944          (const_int 64)))]
 945   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 946   "@
 947    xxpermdi %x0,%x1,%x1,2
 948    lxvd2x %x0,%y1
 949    stxvd2x %x1,%y0
 950    mr %0,%L1\;mr %L0,%1
 951    ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
 952    std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
 953   [(set_attr "length" "*,*,*,8,8,8")
 954    (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
 955
 956 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
 957   [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
 958         (rotate:VSX_TI
 959          (rotate:VSX_TI
 960           (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
 961           (const_int 64))
 962          (const_int 64)))]
 963   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 964   "@
 965    #
 966    xxlor %x0,%x1"
 967   ""
 968   [(set (match_dup 0) (match_dup 1))]
 969 {
 970   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
 971     {
 972       emit_note (NOTE_INSN_DELETED);
 973       DONE;
 974     }
 975 }
 976   [(set_attr "length" "0,4")
 977    (set_attr "type" "veclogical")])
 978
 979 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 980   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
 981         (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
 982   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 983   "@
 984    #
 985    #"
 986   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 987   [(const_int 0)]
 988 {
 989   rtx tmp = (can_create_pseudo_p ()
 990              ? gen_reg_rtx_and_attrs (operands[0])
 991              : operands[0]);
 992   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
 993   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
 994   DONE;
 995 }
 996   [(set_attr "type" "vecload,load")
 997    (set_attr "length" "8,8")
 998    (set_attr "isa" "<VSisa>,*")])
 999
1000 (define_insn "*vsx_le_perm_store_<mode>"
1001   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1002         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
1003   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1004   "@
1005    #
1006    #"
1007   [(set_attr "type" "vecstore,store")
1008    (set_attr "length" "12,8")
1009    (set_attr "isa" "<VSisa>,*")])
1010
1011 (define_split
1012   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1013         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1014   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1015   [(const_int 0)]
1016 {
1017   rtx tmp = (can_create_pseudo_p ()
1018              ? gen_reg_rtx_and_attrs (operands[0])
1019              : operands[0]);
1020   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1021   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1022   DONE;
1023 })
1024
1025 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1026 ;; GPR registers on a little endian system.
1027 (define_peephole2
1028   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1029         (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1030                        (const_int 64)))
1031    (set (match_operand:VSX_TI 2 "int_reg_operand")
1032         (rotate:VSX_TI (match_dup 0)
1033                        (const_int 64)))]
1034   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1035    && (rtx_equal_p (operands[0], operands[2])
1036        || peep2_reg_dead_p (2, operands[0]))"
1037    [(set (match_dup 2) (match_dup 1))])
1038
1039 (define_peephole2
1040   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1041         (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1042                        (const_int 64)))
1043    (set (match_operand:VSX_TI 2 "memory_operand")
1044         (rotate:VSX_TI (match_dup 0)
1045                        (const_int 64)))]
1046   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1047    && peep2_reg_dead_p (2, operands[0])"
1048    [(set (match_dup 2) (match_dup 1))])
1049
1050 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1051 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1052 ;; floating point are handled by the more generic swap elimination pass.
1053 (define_peephole2
1054   [(set (match_operand:TI 0 "vsx_register_operand")
1055         (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1056                    (const_int 64)))
1057    (set (match_operand:TI 2 "vsx_register_operand")
1058         (rotate:TI (match_dup 0)
1059                    (const_int 64)))]
1060   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1061    && (rtx_equal_p (operands[0], operands[2])
1062        || peep2_reg_dead_p (2, operands[0]))"
1063    [(set (match_dup 2) (match_dup 1))])
1064
1065 ;; The post-reload split requires that we re-permute the source
1066 ;; register in case it is still live.
1067 (define_split
1068   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1069         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1070   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1071   [(const_int 0)]
1072 {
1073   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1074   rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1075   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1076   DONE;
1077 })
1078
1079 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1080 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1081 (define_insn "xxspltib_v16qi"
1082   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1083         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1084   "TARGET_P9_VECTOR"
1085 {
1086   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1087   return "xxspltib %x0,%2";
1088 }
1089   [(set_attr "type" "vecperm")])
1090
1091 (define_insn "xxspltib_<mode>_nosplit"
1092   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1093         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1094   "TARGET_P9_VECTOR"
1095 {
1096   rtx op1 = operands[1];
1097   int value = 256;
1098   int num_insns = -1;
1099
1100   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1101       || num_insns != 1)
1102     gcc_unreachable ();
1103
1104   operands[2] = GEN_INT (value & 0xff);
1105   return "xxspltib %x0,%2";
1106 }
1107   [(set_attr "type" "vecperm")])
1108
1109 (define_insn_and_split "*xxspltib_<mode>_split"
1110   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1111         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1112   "TARGET_P9_VECTOR"
1113   "#"
1114   "&& 1"
1115   [(const_int 0)]
1116 {
1117   int value = 256;
1118   int num_insns = -1;
1119   rtx op0 = operands[0];
1120   rtx op1 = operands[1];
1121   rtx tmp = ((can_create_pseudo_p ())
1122              ? gen_reg_rtx (V16QImode)
1123              : gen_lowpart (V16QImode, op0));
1124
1125   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1126       || num_insns != 2)
1127     gcc_unreachable ();
1128
1129   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1130
1131   if (<MODE>mode == V2DImode)
1132     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1133
1134   else if (<MODE>mode == V4SImode)
1135     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1136
1137   else if (<MODE>mode == V8HImode)
1138     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1139
1140   else
1141     gcc_unreachable ();
1142
1143   DONE;
1144 }
1145   [(set_attr "type" "vecperm")
1146    (set_attr "length" "8")])
1147
1148
1149 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1150 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1151 ;; all 1's, since the machine does not have to wait for the previous
1152 ;; instruction using the register being set (such as a store waiting on a slow
1153 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1154
1155 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1156 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1157 ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
1158 (define_insn "vsx_mov<mode>_64bit"
1159   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1160                "=ZwO,      wa,        wa,        r,         we,        ?wQ,
1161                 ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
1162                 ?wa,       v,         <??r>,     wZ,        v")
1163
1164         (match_operand:VSX_M 1 "input_operand"
1165                "wa,        ZwO,       wa,        we,        r,         r,
1166                 wQ,        Y,         r,         r,         wE,        jwM,
1167                 ?jwM,      W,         <nW>,      v,         wZ"))]
1168
1169   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1170    && (register_operand (operands[0], <MODE>mode)
1171        || register_operand (operands[1], <MODE>mode))"
1172 {
1173   return rs6000_output_move_128bit (operands);
1174 }
1175   [(set_attr "type"
1176                "vecstore,  vecload,   vecsimple, mtvsr,     mfvsr,     load,
1177                 store,     load,      store,     *,         vecsimple, vecsimple,
1178                 vecsimple, *,         *,         vecstore,  vecload")
1179    (set_attr "num_insns"
1180                "*,         *,         *,         2,         *,         2,
1181                 2,         2,         2,         2,         *,         *,
1182                 *,         5,         2,         *,         *")
1183    (set_attr "max_prefixed_insns"
1184                "*,         *,         *,         *,         *,         2,
1185                 2,         2,         2,         2,         *,         *,
1186                 *,         *,         *,         *,         *")
1187    (set_attr "length"
1188                "*,         *,         *,         8,         *,         8,
1189                 8,         8,         8,         8,         *,         *,
1190                 *,         20,        8,         *,         *")
1191    (set_attr "isa"
1192                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1193                 *,         *,         *,         *,         p9v,       *,
1194                 <VSisa>,   *,         *,         *,         *")])
1195
1196 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1197 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
1198 ;;              LVX (VMX)  STVX (VMX)
1199 (define_insn "*vsx_mov<mode>_32bit"
1200   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1201                "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
1202                 wa,        v,         ?wa,       v,         <??r>,
1203                 wZ,        v")
1204
1205         (match_operand:VSX_M 1 "input_operand"
1206                "wa,        ZwO,       wa,        Y,         r,         r,
1207                 wE,        jwM,       ?jwM,      W,         <nW>,
1208                 v,         wZ"))]
1209
1210   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1211    && (register_operand (operands[0], <MODE>mode)
1212        || register_operand (operands[1], <MODE>mode))"
1213 {
1214   return rs6000_output_move_128bit (operands);
1215 }
1216   [(set_attr "type"
1217                "vecstore,  vecload,   vecsimple, load,      store,    *,
1218                 vecsimple, vecsimple, vecsimple, *,         *,
1219                 vecstore,  vecload")
1220    (set_attr "length"
1221                "*,         *,         *,         16,        16,        16,
1222                 *,         *,         *,         20,        16,
1223                 *,         *")
1224    (set_attr "isa"
1225                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1226                 p9v,       *,         <VSisa>,   *,         *,
1227                 *,         *")])
1228
1229 ;; Explicit  load/store expanders for the builtin functions
1230 (define_expand "vsx_load_<mode>"
1231   [(set (match_operand:VSX_M 0 "vsx_register_operand")
1232         (match_operand:VSX_M 1 "memory_operand"))]
1233   "VECTOR_MEM_VSX_P (<MODE>mode)"
1234 {
1235   /* Expand to swaps if needed, prior to swap optimization.  */
1236   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1237     {
1238       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1239       DONE;
1240     }
1241 })
1242
1243 (define_expand "vsx_store_<mode>"
1244   [(set (match_operand:VSX_M 0 "memory_operand")
1245         (match_operand:VSX_M 1 "vsx_register_operand"))]
1246   "VECTOR_MEM_VSX_P (<MODE>mode)"
1247 {
1248   /* Expand to swaps if needed, prior to swap optimization.  */
1249   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1250     {
1251       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1252       DONE;
1253     }
1254 })
1255
1256 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1257 ;; when you really want their element-reversing behavior.
1258 (define_insn "vsx_ld_elemrev_v2di"
1259   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1260         (vec_select:V2DI
1261           (match_operand:V2DI 1 "memory_operand" "Z")
1262           (parallel [(const_int 1) (const_int 0)])))]
1263   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1264   "lxvd2x %x0,%y1"
1265   [(set_attr "type" "vecload")])
1266
1267 (define_insn "vsx_ld_elemrev_v1ti"
1268   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1269         (vec_select:V1TI
1270           (match_operand:V1TI 1 "memory_operand" "Z")
1271           (parallel [(const_int 0)])))]
1272   "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1273 {
1274    return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1275 }
1276   [(set_attr "type" "vecload")])
1277
1278 (define_insn "vsx_ld_elemrev_v2df"
1279   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1280         (vec_select:V2DF
1281           (match_operand:V2DF 1 "memory_operand" "Z")
1282           (parallel [(const_int 1) (const_int 0)])))]
1283   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1284   "lxvd2x %x0,%y1"
1285   [(set_attr "type" "vecload")])
1286
1287 (define_insn "vsx_ld_elemrev_v4si"
1288   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1289         (vec_select:V4SI
1290           (match_operand:V4SI 1 "memory_operand" "Z")
1291           (parallel [(const_int 3) (const_int 2)
1292                      (const_int 1) (const_int 0)])))]
1293   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1294   "lxvw4x %x0,%y1"
1295   [(set_attr "type" "vecload")])
1296
1297 (define_insn "vsx_ld_elemrev_v4sf"
1298   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1299         (vec_select:V4SF
1300           (match_operand:V4SF 1 "memory_operand" "Z")
1301           (parallel [(const_int 3) (const_int 2)
1302                      (const_int 1) (const_int 0)])))]
1303   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1304   "lxvw4x %x0,%y1"
1305   [(set_attr "type" "vecload")])
1306
1307 (define_expand "vsx_ld_elemrev_v8hi"
1308   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1309         (vec_select:V8HI
1310           (match_operand:V8HI 1 "memory_operand" "Z")
1311           (parallel [(const_int 7) (const_int 6)
1312                      (const_int 5) (const_int 4)
1313                      (const_int 3) (const_int 2)
1314                      (const_int 1) (const_int 0)])))]
1315   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1316 {
1317   if (!TARGET_P9_VECTOR)
1318     {
1319       rtx tmp = gen_reg_rtx (V4SImode);
1320       rtx subreg, subreg2, perm[16], pcv;
1321       /* 2 is leftmost element in register */
1322       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1323       int i;
1324
1325       subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1326       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1327       subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1328
1329       for (i = 0; i < 16; ++i)
1330         perm[i] = GEN_INT (reorder[i]);
1331
1332       pcv = force_reg (V16QImode,
1333                        gen_rtx_CONST_VECTOR (V16QImode,
1334                                              gen_rtvec_v (16, perm)));
1335       emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1336                                                 subreg2, pcv));
1337       DONE;
1338     }
1339 })
1340
1341 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1342   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1343         (vec_select:V8HI
1344           (match_operand:V8HI 1 "memory_operand" "Z")
1345           (parallel [(const_int 7) (const_int 6)
1346                      (const_int 5) (const_int 4)
1347                      (const_int 3) (const_int 2)
1348                      (const_int 1) (const_int 0)])))]
1349   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1350   "lxvh8x %x0,%y1"
1351   [(set_attr "type" "vecload")])
1352
1353 (define_expand "vsx_ld_elemrev_v16qi"
1354   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1355         (vec_select:V16QI
1356           (match_operand:V16QI 1 "memory_operand" "Z")
1357           (parallel [(const_int 15) (const_int 14)
1358                      (const_int 13) (const_int 12)
1359                      (const_int 11) (const_int 10)
1360                      (const_int  9) (const_int  8)
1361                      (const_int  7) (const_int  6)
1362                      (const_int  5) (const_int  4)
1363                      (const_int  3) (const_int  2)
1364                      (const_int  1) (const_int  0)])))]
1365   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1366 {
1367   if (!TARGET_P9_VECTOR)
1368     {
1369       rtx tmp = gen_reg_rtx (V4SImode);
1370       rtx subreg, subreg2, perm[16], pcv;
1371       /* 3 is leftmost element in register */
1372       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1373       int i;
1374
1375       subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1376       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1377       subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1378
1379       for (i = 0; i < 16; ++i)
1380         perm[i] = GEN_INT (reorder[i]);
1381
1382       pcv = force_reg (V16QImode,
1383                        gen_rtx_CONST_VECTOR (V16QImode,
1384                                              gen_rtvec_v (16, perm)));
1385       emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1386                                                  subreg2, pcv));
1387       DONE;
1388     }
1389 })
1390
1391 (define_insn "vsx_ld_elemrev_v16qi_internal"
1392   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1393         (vec_select:V16QI
1394           (match_operand:V16QI 1 "memory_operand" "Z")
1395           (parallel [(const_int 15) (const_int 14)
1396                      (const_int 13) (const_int 12)
1397                      (const_int 11) (const_int 10)
1398                      (const_int  9) (const_int  8)
1399                      (const_int  7) (const_int  6)
1400                      (const_int  5) (const_int  4)
1401                      (const_int  3) (const_int  2)
1402                      (const_int  1) (const_int  0)])))]
1403   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1404   "lxvb16x %x0,%y1"
1405   [(set_attr "type" "vecload")])
1406
1407 (define_insn "vsx_st_elemrev_v1ti"
1408   [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1409         (vec_select:V1TI
1410           (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1411           (parallel [(const_int 0)])))
1412    (clobber (match_dup 1))]
1413   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1414 {
1415   return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1416 }
1417   [(set_attr "type" "vecstore")])
1418
1419 (define_insn "vsx_st_elemrev_v2df"
1420   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1421         (vec_select:V2DF
1422           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1423           (parallel [(const_int 1) (const_int 0)])))]
1424   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1425   "stxvd2x %x1,%y0"
1426   [(set_attr "type" "vecstore")])
1427
1428 (define_insn "vsx_st_elemrev_v2di"
1429   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1430         (vec_select:V2DI
1431           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1432           (parallel [(const_int 1) (const_int 0)])))]
1433   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1434   "stxvd2x %x1,%y0"
1435   [(set_attr "type" "vecstore")])
1436
1437 (define_insn "vsx_st_elemrev_v4sf"
1438   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1439         (vec_select:V4SF
1440           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1441           (parallel [(const_int 3) (const_int 2)
1442                      (const_int 1) (const_int 0)])))]
1443   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1444   "stxvw4x %x1,%y0"
1445   [(set_attr "type" "vecstore")])
1446
1447 (define_insn "vsx_st_elemrev_v4si"
1448   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1449         (vec_select:V4SI
1450           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1451           (parallel [(const_int 3) (const_int 2)
1452                      (const_int 1) (const_int 0)])))]
1453   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1454   "stxvw4x %x1,%y0"
1455   [(set_attr "type" "vecstore")])
1456
1457 (define_expand "vsx_st_elemrev_v8hi"
1458   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1459         (vec_select:V8HI
1460           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1461           (parallel [(const_int 7) (const_int 6)
1462                      (const_int 5) (const_int 4)
1463                      (const_int 3) (const_int 2)
1464                      (const_int 1) (const_int 0)])))]
1465   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1466 {
1467   if (!TARGET_P9_VECTOR)
1468     {
1469       rtx mem_subreg, subreg, perm[16], pcv;
1470       rtx tmp = gen_reg_rtx (V8HImode);
1471       /* 2 is leftmost element in register */
1472       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1473       int i;
1474
1475       for (i = 0; i < 16; ++i)
1476         perm[i] = GEN_INT (reorder[i]);
1477
1478       pcv = force_reg (V16QImode,
1479                        gen_rtx_CONST_VECTOR (V16QImode,
1480                                              gen_rtvec_v (16, perm)));
1481       emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1482                                                 operands[1], pcv));
1483       subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1484       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1485       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1486       DONE;
1487     }
1488 })
1489
1490 (define_insn "*vsx_st_elemrev_v2di_internal"
1491   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1492         (vec_select:V2DI
1493           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1494           (parallel [(const_int 1) (const_int 0)])))]
1495   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1496   "stxvd2x %x1,%y0"
1497   [(set_attr "type" "vecstore")])
1498
1499 (define_insn "*vsx_st_elemrev_v8hi_internal"
1500   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1501         (vec_select:V8HI
1502           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1503           (parallel [(const_int 7) (const_int 6)
1504                      (const_int 5) (const_int 4)
1505                      (const_int 3) (const_int 2)
1506                      (const_int 1) (const_int 0)])))]
1507   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1508   "stxvh8x %x1,%y0"
1509   [(set_attr "type" "vecstore")])
1510
1511 (define_expand "vsx_st_elemrev_v16qi"
1512   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1513         (vec_select:V16QI
1514           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1515           (parallel [(const_int 15) (const_int 14)
1516                      (const_int 13) (const_int 12)
1517                      (const_int 11) (const_int 10)
1518                      (const_int  9) (const_int  8)
1519                      (const_int  7) (const_int  6)
1520                      (const_int  5) (const_int  4)
1521                      (const_int  3) (const_int  2)
1522                      (const_int  1) (const_int  0)])))]
1523   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1524 {
1525   if (!TARGET_P9_VECTOR)
1526     {
1527       rtx mem_subreg, subreg, perm[16], pcv;
1528       rtx tmp = gen_reg_rtx (V16QImode);
1529       /* 3 is leftmost element in register */
1530       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1531       int i;
1532
1533       for (i = 0; i < 16; ++i)
1534         perm[i] = GEN_INT (reorder[i]);
1535
1536       pcv = force_reg (V16QImode,
1537                        gen_rtx_CONST_VECTOR (V16QImode,
1538                                              gen_rtvec_v (16, perm)));
1539       emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1540                                                  operands[1], pcv));
1541       subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1542       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1543       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1544       DONE;
1545     }
1546 })
1547
1548 (define_insn "*vsx_st_elemrev_v16qi_internal"
1549   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1550         (vec_select:V16QI
1551           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1552           (parallel [(const_int 15) (const_int 14)
1553                      (const_int 13) (const_int 12)
1554                      (const_int 11) (const_int 10)
1555                      (const_int  9) (const_int  8)
1556                      (const_int  7) (const_int  6)
1557                      (const_int  5) (const_int  4)
1558                      (const_int  3) (const_int  2)
1559                      (const_int  1) (const_int  0)])))]
1560   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1561   "stxvb16x %x1,%y0"
1562   [(set_attr "type" "vecstore")])
1563
1564 \f
1565 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1566 ;; instructions are now combined with the insn for the traditional floating
1567 ;; point unit.
1568 (define_insn "*vsx_add<mode>3"
1569   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1570         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1571                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1572   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1573   "xvadd<sd>p %x0,%x1,%x2"
1574   [(set_attr "type" "<VStype_simple>")])
1575
1576 (define_insn "*vsx_sub<mode>3"
1577   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1578         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1579                      (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1580   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1581   "xvsub<sd>p %x0,%x1,%x2"
1582   [(set_attr "type" "<VStype_simple>")])
1583
1584 (define_insn "*vsx_mul<mode>3"
1585   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1586         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1587                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1588   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1589   "xvmul<sd>p %x0,%x1,%x2"
1590   [(set_attr "type" "<VStype_simple>")])
1591
1592 ; Emulate vector with scalar for vec_mul in V2DImode
1593 (define_insn_and_split "vsx_mul_v2di"
1594   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1595         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1596                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1597                      UNSPEC_VSX_MULSD))]
1598   "VECTOR_MEM_VSX_P (V2DImode)"
1599   "#"
1600   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1601   [(const_int 0)]
1602 {
1603   rtx op0 = operands[0];
1604   rtx op1 = operands[1];
1605   rtx op2 = operands[2];
1606   rtx op3 = gen_reg_rtx (DImode);
1607   rtx op4 = gen_reg_rtx (DImode);
1608   rtx op5 = gen_reg_rtx (DImode);
1609   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1610   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1611   if (TARGET_POWERPC64)
1612     emit_insn (gen_muldi3 (op5, op3, op4));
1613   else
1614     {
1615       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1616       emit_move_insn (op5, ret);
1617     }
1618   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1619   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1620   if (TARGET_POWERPC64)
1621     emit_insn (gen_muldi3 (op3, op3, op4));
1622   else
1623     {
1624       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1625       emit_move_insn (op3, ret);
1626     }
1627   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1628   DONE;
1629 }
1630   [(set_attr "type" "mul")])
1631
1632 (define_insn "*vsx_div<mode>3"
1633   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1634         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1635                    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1636   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1637   "xvdiv<sd>p %x0,%x1,%x2"
1638   [(set_attr "type" "<VStype_div>")])
1639
1640 ; Emulate vector with scalar for vec_div in V2DImode
1641 (define_insn_and_split "vsx_div_v2di"
1642   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1643         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1644                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1645                      UNSPEC_VSX_DIVSD))]
1646   "VECTOR_MEM_VSX_P (V2DImode)"
1647   "#"
1648   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1649   [(const_int 0)]
1650 {
1651   rtx op0 = operands[0];
1652   rtx op1 = operands[1];
1653   rtx op2 = operands[2];
1654   rtx op3 = gen_reg_rtx (DImode);
1655   rtx op4 = gen_reg_rtx (DImode);
1656   rtx op5 = gen_reg_rtx (DImode);
1657   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1658   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1659   if (TARGET_POWERPC64)
1660     emit_insn (gen_divdi3 (op5, op3, op4));
1661   else
1662     {
1663       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1664       rtx target = emit_library_call_value (libfunc,
1665                                             op5, LCT_NORMAL, DImode,
1666                                             op3, DImode,
1667                                             op4, DImode);
1668       emit_move_insn (op5, target);
1669     }
1670   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1671   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1672   if (TARGET_POWERPC64)
1673     emit_insn (gen_divdi3 (op3, op3, op4));
1674   else
1675     {
1676       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1677       rtx target = emit_library_call_value (libfunc,
1678                                             op3, LCT_NORMAL, DImode,
1679                                             op3, DImode,
1680                                             op4, DImode);
1681       emit_move_insn (op3, target);
1682     }
1683   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1684   DONE;
1685 }
1686   [(set_attr "type" "div")])
1687
1688 (define_insn_and_split "vsx_udiv_v2di"
1689   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1690         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1691                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1692                      UNSPEC_VSX_DIVUD))]
1693   "VECTOR_MEM_VSX_P (V2DImode)"
1694   "#"
1695   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1696   [(const_int 0)]
1697 {
1698   rtx op0 = operands[0];
1699   rtx op1 = operands[1];
1700   rtx op2 = operands[2];
1701   rtx op3 = gen_reg_rtx (DImode);
1702   rtx op4 = gen_reg_rtx (DImode);
1703   rtx op5 = gen_reg_rtx (DImode);
1704   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1705   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1706   if (TARGET_POWERPC64)
1707     emit_insn (gen_udivdi3 (op5, op3, op4));
1708   else
1709     {
1710       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1711       rtx target = emit_library_call_value (libfunc,
1712                                             op5, LCT_NORMAL, DImode,
1713                                             op3, DImode,
1714                                             op4, DImode);
1715       emit_move_insn (op5, target);
1716     }
1717   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1718   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1719   if (TARGET_POWERPC64)
1720     emit_insn (gen_udivdi3 (op3, op3, op4));
1721   else
1722     {
1723       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1724       rtx target = emit_library_call_value (libfunc,
1725                                             op3, LCT_NORMAL, DImode,
1726                                             op3, DImode,
1727                                             op4, DImode);
1728       emit_move_insn (op3, target);
1729     }
1730   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1731   DONE;
1732 }
1733   [(set_attr "type" "div")])
1734
1735 ;; *tdiv* instruction returning the FG flag
1736 (define_expand "vsx_tdiv<mode>3_fg"
1737   [(set (match_dup 3)
1738         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1739                       (match_operand:VSX_B 2 "vsx_register_operand")]
1740                      UNSPEC_VSX_TDIV))
1741    (set (match_operand:SI 0 "gpc_reg_operand")
1742         (gt:SI (match_dup 3)
1743                (const_int 0)))]
1744   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1745 {
1746   operands[3] = gen_reg_rtx (CCFPmode);
1747 })
1748
1749 ;; *tdiv* instruction returning the FE flag
1750 (define_expand "vsx_tdiv<mode>3_fe"
1751   [(set (match_dup 3)
1752         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1753                       (match_operand:VSX_B 2 "vsx_register_operand")]
1754                      UNSPEC_VSX_TDIV))
1755    (set (match_operand:SI 0 "gpc_reg_operand")
1756         (eq:SI (match_dup 3)
1757                (const_int 0)))]
1758   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1759 {
1760   operands[3] = gen_reg_rtx (CCFPmode);
1761 })
1762
1763 (define_insn "*vsx_tdiv<mode>3_internal"
1764   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1765         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1766                       (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1767                    UNSPEC_VSX_TDIV))]
1768   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1769   "x<VSv>tdiv<sd>p %0,%x1,%x2"
1770   [(set_attr "type" "<VStype_simple>")])
1771
1772 (define_insn "vsx_fre<mode>2"
1773   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1774         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1775                       UNSPEC_FRES))]
1776   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1777   "xvre<sd>p %x0,%x1"
1778   [(set_attr "type" "<VStype_simple>")])
1779
1780 (define_insn "*vsx_neg<mode>2"
1781   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1782         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1783   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1784   "xvneg<sd>p %x0,%x1"
1785   [(set_attr "type" "<VStype_simple>")])
1786
1787 (define_insn "*vsx_abs<mode>2"
1788   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1789         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1790   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1791   "xvabs<sd>p %x0,%x1"
1792   [(set_attr "type" "<VStype_simple>")])
1793
1794 (define_insn "vsx_nabs<mode>2"
1795   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1796         (neg:VSX_F
1797          (abs:VSX_F
1798           (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1799   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1800   "xvnabs<sd>p %x0,%x1"
1801   [(set_attr "type" "<VStype_simple>")])
1802
1803 (define_insn "vsx_smax<mode>3"
1804   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1805         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1806                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1807   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1808   "xvmax<sd>p %x0,%x1,%x2"
1809   [(set_attr "type" "<VStype_simple>")])
1810
1811 (define_insn "*vsx_smin<mode>3"
1812   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1813         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1814                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1815   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1816   "xvmin<sd>p %x0,%x1,%x2"
1817   [(set_attr "type" "<VStype_simple>")])
1818
1819 (define_insn "*vsx_sqrt<mode>2"
1820   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1821         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1822   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1823   "xvsqrt<sd>p %x0,%x1"
1824   [(set_attr "type" "<sd>sqrt")])
1825
1826 (define_insn "*vsx_rsqrte<mode>2"
1827   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1828         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1829                       UNSPEC_RSQRT))]
1830   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1831   "xvrsqrte<sd>p %x0,%x1"
1832   [(set_attr "type" "<VStype_simple>")])
1833
1834 ;; *tsqrt* returning the fg flag
1835 (define_expand "vsx_tsqrt<mode>2_fg"
1836   [(set (match_dup 2)
1837         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1838                      UNSPEC_VSX_TSQRT))
1839    (set (match_operand:SI 0 "gpc_reg_operand")
1840         (gt:SI (match_dup 2)
1841                (const_int 0)))]
1842   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1843 {
1844   operands[2] = gen_reg_rtx (CCFPmode);
1845 })
1846
1847 ;; *tsqrt* returning the fe flag
1848 (define_expand "vsx_tsqrt<mode>2_fe"
1849   [(set (match_dup 2)
1850         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1851                      UNSPEC_VSX_TSQRT))
1852    (set (match_operand:SI 0 "gpc_reg_operand")
1853         (eq:SI (match_dup 2)
1854                (const_int 0)))]
1855   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1856 {
1857   operands[2] = gen_reg_rtx (CCFPmode);
1858 })
1859
1860 (define_insn "*vsx_tsqrt<mode>2_internal"
1861   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1862         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1863                      UNSPEC_VSX_TSQRT))]
1864   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1865   "x<VSv>tsqrt<sd>p %0,%x1"
1866   [(set_attr "type" "<VStype_simple>")])
1867
1868 ;; Fused vector multiply/add instructions. Support the classical Altivec
1869 ;; versions of fma, which allows the target to be a separate register from the
1870 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1871 ;; multiply.
1872
1873 (define_insn "*vsx_fmav4sf4"
1874   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1875         (fma:V4SF
1876           (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1877           (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1878           (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
1879   "VECTOR_UNIT_VSX_P (V4SFmode)"
1880   "@
1881    xvmaddasp %x0,%x1,%x2
1882    xvmaddmsp %x0,%x1,%x3
1883    vmaddfp %0,%1,%2,%3"
1884   [(set_attr "type" "vecfloat")])
1885
1886 (define_insn "*vsx_fmav2df4"
1887   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1888         (fma:V2DF
1889           (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1890           (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1891           (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1892   "VECTOR_UNIT_VSX_P (V2DFmode)"
1893   "@
1894    xvmaddadp %x0,%x1,%x2
1895    xvmaddmdp %x0,%x1,%x3"
1896   [(set_attr "type" "vecdouble")])
1897
1898 (define_insn "*vsx_fms<mode>4"
1899   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1900         (fma:VSX_F
1901           (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1902           (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1903           (neg:VSX_F
1904             (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1905   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1906   "@
1907    xvmsuba<sd>p %x0,%x1,%x2
1908    xvmsubm<sd>p %x0,%x1,%x3"
1909   [(set_attr "type" "<VStype_mul>")])
1910
1911 (define_insn "*vsx_nfma<mode>4"
1912   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1913         (neg:VSX_F
1914          (fma:VSX_F
1915           (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1916           (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1917           (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1918   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1919   "@
1920    xvnmadda<sd>p %x0,%x1,%x2
1921    xvnmaddm<sd>p %x0,%x1,%x3"
1922   [(set_attr "type" "<VStype_mul>")])
1923
1924 (define_insn "*vsx_nfmsv4sf4"
1925   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1926         (neg:V4SF
1927          (fma:V4SF
1928            (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1929            (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1930            (neg:V4SF
1931              (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
1932   "VECTOR_UNIT_VSX_P (V4SFmode)"
1933   "@
1934    xvnmsubasp %x0,%x1,%x2
1935    xvnmsubmsp %x0,%x1,%x3
1936    vnmsubfp %0,%1,%2,%3"
1937   [(set_attr "type" "vecfloat")])
1938
1939 (define_insn "*vsx_nfmsv2df4"
1940   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1941         (neg:V2DF
1942          (fma:V2DF
1943            (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1944            (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1945            (neg:V2DF
1946              (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1947   "VECTOR_UNIT_VSX_P (V2DFmode)"
1948   "@
1949    xvnmsubadp %x0,%x1,%x2
1950    xvnmsubmdp %x0,%x1,%x3"
1951   [(set_attr "type" "vecdouble")])
1952
1953 ;; Vector conditional expressions (no scalar version for these instructions)
1954 (define_insn "vsx_eq<mode>"
1955   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1956         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1957                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1958   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1959   "xvcmpeq<sd>p %x0,%x1,%x2"
1960   [(set_attr "type" "<VStype_simple>")])
1961
1962 (define_insn "vsx_gt<mode>"
1963   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1964         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1965                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1966   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1967   "xvcmpgt<sd>p %x0,%x1,%x2"
1968   [(set_attr "type" "<VStype_simple>")])
1969
1970 (define_insn "*vsx_ge<mode>"
1971   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1972         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1973                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1974   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1975   "xvcmpge<sd>p %x0,%x1,%x2"
1976   [(set_attr "type" "<VStype_simple>")])
1977
1978 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1979 ;; indicate a combined status
1980 (define_insn "*vsx_eq_<mode>_p"
1981   [(set (reg:CC CR6_REGNO)
1982         (unspec:CC
1983          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1984                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1985          UNSPEC_PREDICATE))
1986    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1987         (eq:VSX_F (match_dup 1)
1988                   (match_dup 2)))]
1989   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1990   "xvcmpeq<sd>p. %x0,%x1,%x2"
1991   [(set_attr "type" "<VStype_simple>")])
1992
1993 (define_insn "*vsx_gt_<mode>_p"
1994   [(set (reg:CC CR6_REGNO)
1995         (unspec:CC
1996          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1997                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1998          UNSPEC_PREDICATE))
1999    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2000         (gt:VSX_F (match_dup 1)
2001                   (match_dup 2)))]
2002   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2003   "xvcmpgt<sd>p. %x0,%x1,%x2"
2004   [(set_attr "type" "<VStype_simple>")])
2005
2006 ;; xvtlsbb BF,XB
2007 ;; Set the CR field BF to indicate if the lowest bit (bit 7) of every byte
2008 ;; element in VSR[XB] is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE).
2009 (define_insn "*xvtlsbb_internal"
2010   [(set (match_operand:CC 0 "cc_reg_operand" "=y")
2011         (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
2012          UNSPEC_XVTLSBB))]
2013   "TARGET_POWER10"
2014   "xvtlsbb %0,%x1"
2015   [(set_attr "type" "logical")])
2016
2017 ;; Vector Test Least Significant Bit by Byte
2018 ;; for the implementation of the builtin
2019 ;;     __builtin_vec_test_lsbb_all_ones
2020 ;;     int vec_test_lsbb_all_ones (vector unsigned char);
2021 ;; and
2022 ;;     __builtin_vec_test_lsbb_all_zeros
2023 ;;     int vec_test_lsbb_all_zeros (vector unsigned char);
2024 (define_expand "xvtlsbbo"
2025   [(set (match_dup 2)
2026         (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2027          UNSPEC_XVTLSBB))
2028    (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2029         (lt:SI (match_dup 2) (const_int 0)))]
2030   "TARGET_POWER10"
2031 {
2032    operands[2] = gen_reg_rtx (CCmode);
2033 })
2034 (define_expand "xvtlsbbz"
2035   [(set (match_dup 2)
2036         (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2037          UNSPEC_XVTLSBB))
2038    (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2039         (eq:SI (match_dup 2) (const_int 0)))]
2040   "TARGET_POWER10"
2041 {
2042    operands[2] = gen_reg_rtx (CCmode);
2043 })
2044
2045 (define_insn "*vsx_ge_<mode>_p"
2046   [(set (reg:CC CR6_REGNO)
2047         (unspec:CC
2048          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2049                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2050          UNSPEC_PREDICATE))
2051    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2052         (ge:VSX_F (match_dup 1)
2053                   (match_dup 2)))]
2054   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2055   "xvcmpge<sd>p. %x0,%x1,%x2"
2056   [(set_attr "type" "<VStype_simple>")])
2057
2058 ;; Vector select
2059 (define_insn "*vsx_xxsel<mode>"
2060   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2061         (if_then_else:VSX_L
2062          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2063                 (match_operand:VSX_L 4 "zero_constant" ""))
2064          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2065          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2066   "VECTOR_MEM_VSX_P (<MODE>mode)"
2067   "xxsel %x0,%x3,%x2,%x1"
2068   [(set_attr "type" "vecmove")
2069    (set_attr "isa" "<VSisa>")])
2070
2071 (define_insn "*vsx_xxsel<mode>_uns"
2072   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2073         (if_then_else:VSX_L
2074          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2075                    (match_operand:VSX_L 4 "zero_constant" ""))
2076          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2077          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2078   "VECTOR_MEM_VSX_P (<MODE>mode)"
2079   "xxsel %x0,%x3,%x2,%x1"
2080   [(set_attr "type" "vecmove")
2081    (set_attr "isa" "<VSisa>")])
2082
2083 ;; Copy sign
2084 (define_insn "vsx_copysign<mode>3"
2085   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2086         (unspec:VSX_F
2087          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2088           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2089          UNSPEC_COPYSIGN))]
2090   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2091   "xvcpsgn<sd>p %x0,%x2,%x1"
2092   [(set_attr "type" "<VStype_simple>")])
2093
2094 ;; For the conversions, limit the register class for the integer value to be
2095 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2096 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2097 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2098 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2099 ;; in allowing virtual registers.
2100 (define_insn "vsx_float<VSi><mode>2"
2101   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2102         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2103   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2104   "xvcvsx<VSc><sd>p %x0,%x1"
2105   [(set_attr "type" "<VStype_simple>")])
2106
2107 (define_insn "vsx_floatuns<VSi><mode>2"
2108   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2109         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2110   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2111   "xvcvux<VSc><sd>p %x0,%x1"
2112   [(set_attr "type" "<VStype_simple>")])
2113
2114 (define_insn "vsx_fix_trunc<mode><VSi>2"
2115   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2116         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2117   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2118   "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2119   [(set_attr "type" "<VStype_simple>")])
2120
2121 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2122   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2123         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2124   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2125   "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2126   [(set_attr "type" "<VStype_simple>")])
2127
2128 ;; Math rounding functions
2129 (define_insn "vsx_x<VSv>r<sd>pi"
2130   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2131         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2132                       UNSPEC_VSX_ROUND_I))]
2133   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2134   "x<VSv>r<sd>pi %x0,%x1"
2135   [(set_attr "type" "<VStype_simple>")])
2136
2137 (define_insn "vsx_x<VSv>r<sd>pic"
2138   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2139         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2140                       UNSPEC_VSX_ROUND_IC))]
2141   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2142   "x<VSv>r<sd>pic %x0,%x1"
2143   [(set_attr "type" "<VStype_simple>")])
2144
2145 (define_insn "vsx_btrunc<mode>2"
2146   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2147         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2148   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2149   "xvr<sd>piz %x0,%x1"
2150   [(set_attr "type" "<VStype_simple>")])
2151
2152 (define_insn "*vsx_b2trunc<mode>2"
2153   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2154         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2155                       UNSPEC_FRIZ))]
2156   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2157   "x<VSv>r<sd>piz %x0,%x1"
2158   [(set_attr "type" "<VStype_simple>")])
2159
2160 (define_insn "vsx_floor<mode>2"
2161   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2162         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2163                       UNSPEC_FRIM))]
2164   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2165   "xvr<sd>pim %x0,%x1"
2166   [(set_attr "type" "<VStype_simple>")])
2167
2168 (define_insn "vsx_ceil<mode>2"
2169   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2170         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2171                       UNSPEC_FRIP))]
2172   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2173   "xvr<sd>pip %x0,%x1"
2174   [(set_attr "type" "<VStype_simple>")])
2175
2176 \f
2177 ;; VSX convert to/from double vector
2178
2179 ;; Convert between single and double precision
2180 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2181 ;; scalar single precision instructions internally use the double format.
2182 ;; Prefer the altivec registers, since we likely will need to do a vperm
2183 (define_insn "vsx_xscvdpsp"
2184   [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2185         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2186                               UNSPEC_VSX_CVSPDP))]
2187   "VECTOR_UNIT_VSX_P (DFmode)"
2188   "xscvdpsp %x0,%x1"
2189   [(set_attr "type" "fp")])
2190
2191 (define_insn "vsx_xvcvspdp_be"
2192   [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2193      (float_extend:V2DF
2194        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2195          (parallel [(const_int 0) (const_int 2)]))))]
2196   "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2197   "xvcvspdp %x0,%x1"
2198   [(set_attr "type" "vecdouble")])
2199
2200 (define_insn "vsx_xvcvspdp_le"
2201   [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2202      (float_extend:V2DF
2203        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2204          (parallel [(const_int 1) (const_int 3)]))))]
2205   "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2206   "xvcvspdp %x0,%x1"
2207   [(set_attr "type" "vecdouble")])
2208
2209 (define_expand "vsx_xvcvspdp"
2210   [(match_operand:V2DF 0 "vsx_register_operand")
2211    (match_operand:V4SF 1 "vsx_register_operand")]
2212   "VECTOR_UNIT_VSX_P (V4SFmode)"
2213 {
2214   if (BYTES_BIG_ENDIAN)
2215     emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2216   else
2217     emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2218   DONE;
2219 })
2220
2221 (define_insn "vsx_xvcvdpsp"
2222   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2223         (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2224                               UNSPEC_VSX_CVSPDP))]
2225   "VECTOR_UNIT_VSX_P (V2DFmode)"
2226   "xvcvdpsp %x0,%x1"
2227   [(set_attr "type" "vecdouble")])
2228
2229 ;; xscvspdp, represent the scalar SF type as V4SF
2230 (define_insn "vsx_xscvspdp"
2231   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2232         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2233                    UNSPEC_VSX_CVSPDP))]
2234   "VECTOR_UNIT_VSX_P (V4SFmode)"
2235   "xscvspdp %x0,%x1"
2236   [(set_attr "type" "fp")])
2237
2238 ;; Same as vsx_xscvspdp, but use SF as the type
2239 (define_insn "vsx_xscvspdp_scalar2"
2240   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2241         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2242                    UNSPEC_VSX_CVSPDP))]
2243   "VECTOR_UNIT_VSX_P (V4SFmode)"
2244   "xscvspdp %x0,%x1"
2245   [(set_attr "type" "fp")])
2246
2247 ;; Generate xvcvhpsp instruction
2248 (define_insn "vsx_xvcvhpsp"
2249   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2250         (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2251                      UNSPEC_VSX_CVHPSP))]
2252   "TARGET_P9_VECTOR"
2253   "xvcvhpsp %x0,%x1"
2254   [(set_attr "type" "vecfloat")])
2255
2256 ;; Generate xvcvsphp
2257 (define_insn "vsx_xvcvsphp"
2258   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2259         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2260                      UNSPEC_VSX_XVCVSPHP))]
2261   "TARGET_P9_VECTOR"
2262   "xvcvsphp %x0,%x1"
2263 [(set_attr "type" "vecfloat")])
2264
2265 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2266 ;; format of scalars is actually DF.
2267 (define_insn "vsx_xscvdpsp_scalar"
2268   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2269         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2270                      UNSPEC_VSX_CVSPDP))]
2271   "VECTOR_UNIT_VSX_P (V4SFmode)"
2272   "xscvdpsp %x0,%x1"
2273   [(set_attr "type" "fp")])
2274
2275 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2276 (define_insn "vsx_xscvdpspn"
2277   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2278         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2279                      UNSPEC_VSX_CVDPSPN))]
2280   "TARGET_XSCVDPSPN"
2281   "xscvdpspn %x0,%x1"
2282   [(set_attr "type" "fp")])
2283
2284 (define_insn "vsx_xscvspdpn"
2285   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2286         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2287                    UNSPEC_VSX_CVSPDPN))]
2288   "TARGET_XSCVSPDPN"
2289   "xscvspdpn %x0,%x1"
2290   [(set_attr "type" "fp")])
2291
2292 (define_insn "vsx_xscvdpspn_scalar"
2293   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2294         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2295                      UNSPEC_VSX_CVDPSPN))]
2296   "TARGET_XSCVDPSPN"
2297   "xscvdpspn %x0,%x1"
2298   [(set_attr "type" "fp")])
2299
2300 ;; Used by direct move to move a SFmode value from GPR to VSX register
2301 (define_insn "vsx_xscvspdpn_directmove"
2302   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2303         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2304                    UNSPEC_VSX_CVSPDPN))]
2305   "TARGET_XSCVSPDPN"
2306   "xscvspdpn %x0,%x1"
2307   [(set_attr "type" "fp")])
2308
2309 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2310
2311 (define_insn "vsx_xvcv<su>xwsp"
2312   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2313      (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2314   "VECTOR_UNIT_VSX_P (V4SFmode)"
2315   "xvcv<su>xwsp %x0,%x1"
2316   [(set_attr "type" "vecfloat")])
2317
2318 (define_insn "vsx_xvcv<su>xddp"
2319   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2320         (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2321   "VECTOR_UNIT_VSX_P (V2DFmode)"
2322   "xvcv<su>xddp %x0,%x1"
2323   [(set_attr "type" "vecdouble")])
2324
2325 (define_insn "vsx_xvcvsp<su>xws"
2326   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2327         (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2328   "VECTOR_UNIT_VSX_P (V4SFmode)"
2329   "xvcvsp<su>xws %x0,%x1"
2330   [(set_attr "type" "vecfloat")])
2331
2332 (define_insn "vsx_xvcvdp<su>xds"
2333   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2334         (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2335   "VECTOR_UNIT_VSX_P (V2DFmode)"
2336   "xvcvdp<su>xds %x0,%x1"
2337   [(set_attr "type" "vecdouble")])
2338
2339 (define_expand "vsx_xvcvsxddp_scale"
2340   [(match_operand:V2DF 0 "vsx_register_operand")
2341    (match_operand:V2DI 1 "vsx_register_operand")
2342    (match_operand:QI 2 "immediate_operand")]
2343   "VECTOR_UNIT_VSX_P (V2DFmode)"
2344 {
2345   rtx op0 = operands[0];
2346   rtx op1 = operands[1];
2347   int scale = INTVAL(operands[2]);
2348   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2349   if (scale != 0)
2350     rs6000_scale_v2df (op0, op0, -scale);
2351   DONE;
2352 })
2353
2354 (define_expand "vsx_xvcvuxddp_scale"
2355   [(match_operand:V2DF 0 "vsx_register_operand")
2356    (match_operand:V2DI 1 "vsx_register_operand")
2357    (match_operand:QI 2 "immediate_operand")]
2358   "VECTOR_UNIT_VSX_P (V2DFmode)"
2359 {
2360   rtx op0 = operands[0];
2361   rtx op1 = operands[1];
2362   int scale = INTVAL(operands[2]);
2363   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2364   if (scale != 0)
2365     rs6000_scale_v2df (op0, op0, -scale);
2366   DONE;
2367 })
2368
2369 (define_expand "vsx_xvcvdpsxds_scale"
2370   [(match_operand:V2DI 0 "vsx_register_operand")
2371    (match_operand:V2DF 1 "vsx_register_operand")
2372    (match_operand:QI 2 "immediate_operand")]
2373   "VECTOR_UNIT_VSX_P (V2DFmode)"
2374 {
2375   rtx op0 = operands[0];
2376   rtx op1 = operands[1];
2377   rtx tmp;
2378   int scale = INTVAL (operands[2]);
2379   if (scale == 0)
2380     tmp = op1;
2381   else
2382     {
2383       tmp  = gen_reg_rtx (V2DFmode);
2384       rs6000_scale_v2df (tmp, op1, scale);
2385     }
2386   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2387   DONE;
2388 })
2389
2390 ;; convert vector of 64-bit floating point numbers to vector of
2391 ;; 64-bit unsigned integer
2392 (define_expand "vsx_xvcvdpuxds_scale"
2393   [(match_operand:V2DI 0 "vsx_register_operand")
2394    (match_operand:V2DF 1 "vsx_register_operand")
2395    (match_operand:QI 2 "immediate_operand")]
2396   "VECTOR_UNIT_VSX_P (V2DFmode)"
2397 {
2398   rtx op0 = operands[0];
2399   rtx op1 = operands[1];
2400   rtx tmp;
2401   int scale = INTVAL (operands[2]);
2402   if (scale == 0)
2403     tmp = op1;
2404   else
2405     {
2406       tmp = gen_reg_rtx (V2DFmode);
2407       rs6000_scale_v2df (tmp, op1, scale);
2408     }
2409   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2410   DONE;
2411 })
2412
2413 ;; Convert from 64-bit to 32-bit types
2414 ;; Note, favor the Altivec registers since the usual use of these instructions
2415 ;; is in vector converts and we need to use the Altivec vperm instruction.
2416
2417 (define_insn "vsx_xvcvdpsxws"
2418   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2419         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2420                      UNSPEC_VSX_CVDPSXWS))]
2421   "VECTOR_UNIT_VSX_P (V2DFmode)"
2422   "xvcvdpsxws %x0,%x1"
2423   [(set_attr "type" "vecdouble")])
2424
2425 (define_insn "vsx_xvcvdpuxws"
2426   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2427         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2428                      UNSPEC_VSX_CVDPUXWS))]
2429   "VECTOR_UNIT_VSX_P (V2DFmode)"
2430   "xvcvdpuxws %x0,%x1"
2431   [(set_attr "type" "vecdouble")])
2432
2433 (define_insn "vsx_xvcvsxdsp"
2434   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2435         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2436                      UNSPEC_VSX_CVSXDSP))]
2437   "VECTOR_UNIT_VSX_P (V2DFmode)"
2438   "xvcvsxdsp %x0,%x1"
2439   [(set_attr "type" "vecfloat")])
2440
2441 (define_insn "vsx_xvcvuxdsp"
2442   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2443         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2444                      UNSPEC_VSX_CVUXDSP))]
2445   "VECTOR_UNIT_VSX_P (V2DFmode)"
2446   "xvcvuxdsp %x0,%x1"
2447   [(set_attr "type" "vecdouble")])
2448
2449 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2450 ;; 64-bit floating point numbers.
2451 (define_insn "vsx_xvcv<su>xwdp_be"
2452   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2453      (any_float:V2DF
2454        (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2455          (parallel [(const_int 0) (const_int 2)]))))]
2456   "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2457   "xvcv<su>xwdp %x0,%x1"
2458   [(set_attr "type" "vecdouble")])
2459
2460 (define_insn "vsx_xvcv<su>xwdp_le"
2461   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2462      (any_float:V2DF
2463        (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2464          (parallel [(const_int 1) (const_int 3)]))))]
2465   "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2466   "xvcv<su>xwdp %x0,%x1"
2467   [(set_attr "type" "vecdouble")])
2468
2469 (define_expand "vsx_xvcv<su>xwdp"
2470   [(match_operand:V2DF 0 "vsx_register_operand")
2471    (match_operand:V4SI 1 "vsx_register_operand")
2472    (any_float (pc))]
2473   "VECTOR_UNIT_VSX_P (V2DFmode)"
2474 {
2475   if (BYTES_BIG_ENDIAN)
2476     emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2477   else
2478     emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2479   DONE;
2480 })
2481
2482 (define_insn "vsx_xvcvsxwdp_df"
2483   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2484         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2485                    UNSPEC_VSX_CVSXWDP))]
2486   "TARGET_VSX"
2487   "xvcvsxwdp %x0,%x1"
2488   [(set_attr "type" "vecdouble")])
2489
2490 (define_insn "vsx_xvcvuxwdp_df"
2491   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2492         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2493                    UNSPEC_VSX_CVUXWDP))]
2494   "TARGET_VSX"
2495   "xvcvuxwdp %x0,%x1"
2496   [(set_attr "type" "vecdouble")])
2497
2498 ;; Convert vector of 32-bit floating point numbers to vector of
2499 ;; 64-bit signed/unsigned integers.
2500 (define_insn "vsx_xvcvsp<su>xds_be"
2501   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2502      (any_fix:V2DI
2503        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2504          (parallel [(const_int 0) (const_int 2)]))))]
2505   "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2506   "xvcvsp<su>xds %x0,%x1"
2507   [(set_attr "type" "vecdouble")])
2508
2509 (define_insn "vsx_xvcvsp<su>xds_le"
2510   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2511      (any_fix:V2DI
2512        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2513          (parallel [(const_int 1) (const_int 3)]))))]
2514   "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2515   "xvcvsp<su>xds %x0,%x1"
2516   [(set_attr "type" "vecdouble")])
2517
2518 (define_expand "vsx_xvcvsp<su>xds"
2519   [(match_operand:V2DI 0 "vsx_register_operand")
2520    (match_operand:V4SF 1 "vsx_register_operand")
2521    (any_fix (pc))]
2522   "VECTOR_UNIT_VSX_P (V2DFmode)"
2523 {
2524   if (BYTES_BIG_ENDIAN)
2525     emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2526   else
2527     emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2528   DONE;
2529 })
2530
2531 ;; Generate float2 double
2532 ;; convert two double to float
2533 (define_expand "float2_v2df"
2534   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2535    (use (match_operand:V2DF 1 "register_operand" "wa"))
2536    (use (match_operand:V2DF 2 "register_operand" "wa"))]
2537  "VECTOR_UNIT_VSX_P (V4SFmode)"
2538 {
2539   rtx rtx_src1, rtx_src2, rtx_dst;
2540
2541   rtx_dst = operands[0];
2542   rtx_src1 = operands[1];
2543   rtx_src2 = operands[2];
2544
2545   rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2546   DONE;
2547 })
2548
2549 ;; Generate float2
2550 ;; convert two long long signed ints to float
2551 (define_expand "float2_v2di"
2552   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2553    (use (match_operand:V2DI 1 "register_operand" "wa"))
2554    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2555  "VECTOR_UNIT_VSX_P (V4SFmode)"
2556 {
2557   rtx rtx_src1, rtx_src2, rtx_dst;
2558
2559   rtx_dst = operands[0];
2560   rtx_src1 = operands[1];
2561   rtx_src2 = operands[2];
2562
2563   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2564   DONE;
2565 })
2566
2567 ;; Generate uns_float2
2568 ;; convert two long long unsigned ints to float
2569 (define_expand "uns_float2_v2di"
2570   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2571    (use (match_operand:V2DI 1 "register_operand" "wa"))
2572    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2573  "VECTOR_UNIT_VSX_P (V4SFmode)"
2574 {
2575   rtx rtx_src1, rtx_src2, rtx_dst;
2576
2577   rtx_dst = operands[0];
2578   rtx_src1 = operands[1];
2579   rtx_src2 = operands[2];
2580
2581   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2582   DONE;
2583 })
2584
2585 ;; Generate floate
2586 ;; convert  double or long long signed to float
2587 ;; (Only even words are valid, BE numbering)
2588 (define_expand "floate<mode>"
2589   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2590    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2591   "VECTOR_UNIT_VSX_P (V4SFmode)"
2592 {
2593   if (BYTES_BIG_ENDIAN)
2594     {
2595       /* Shift left one word to put even word correct location */
2596       rtx rtx_tmp;
2597       rtx rtx_val = GEN_INT (4);
2598
2599       rtx_tmp = gen_reg_rtx (V4SFmode);
2600       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2601       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2602                  rtx_tmp, rtx_tmp, rtx_val));
2603     }
2604   else
2605     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2606
2607   DONE;
2608 })
2609
2610 ;; Generate uns_floate
2611 ;; convert long long unsigned to float
2612 ;; (Only even words are valid, BE numbering)
2613 (define_expand "unsfloatev2di"
2614   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2615    (use (match_operand:V2DI 1 "register_operand" "wa"))]
2616   "VECTOR_UNIT_VSX_P (V4SFmode)"
2617 {
2618   if (BYTES_BIG_ENDIAN)
2619     {
2620       /* Shift left one word to put even word correct location */
2621       rtx rtx_tmp;
2622       rtx rtx_val = GEN_INT (4);
2623
2624       rtx_tmp = gen_reg_rtx (V4SFmode);
2625       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2626       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2627                  rtx_tmp, rtx_tmp, rtx_val));
2628     }
2629   else
2630     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2631
2632   DONE;
2633 })
2634
2635 ;; Generate floato
2636 ;; convert double or long long signed to float
2637 ;; Only odd words are valid, BE numbering)
2638 (define_expand "floato<mode>"
2639   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2640    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2641   "VECTOR_UNIT_VSX_P (V4SFmode)"
2642 {
2643   if (BYTES_BIG_ENDIAN)
2644     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2645   else
2646     {
2647       /* Shift left one word to put odd word correct location */
2648       rtx rtx_tmp;
2649       rtx rtx_val = GEN_INT (4);
2650
2651       rtx_tmp = gen_reg_rtx (V4SFmode);
2652       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2653       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2654                  rtx_tmp, rtx_tmp, rtx_val));
2655     }
2656   DONE;
2657 })
2658
2659 ;; Generate uns_floato
2660 ;; convert long long unsigned to float
2661 ;; (Only odd words are valid, BE numbering)
2662 (define_expand "unsfloatov2di"
2663  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2664   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2665  "VECTOR_UNIT_VSX_P (V4SFmode)"
2666 {
2667   if (BYTES_BIG_ENDIAN)
2668     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2669   else
2670     {
2671       /* Shift left one word to put odd word correct location */
2672       rtx rtx_tmp;
2673       rtx rtx_val = GEN_INT (4);
2674
2675       rtx_tmp = gen_reg_rtx (V4SFmode);
2676       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2677       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2678                  rtx_tmp, rtx_tmp, rtx_val));
2679     }
2680   DONE;
2681 })
2682
2683 ;; Generate vsigned2
2684 ;; convert two double float vectors to a vector of single precision ints
2685 (define_expand "vsigned2_v2df"
2686   [(match_operand:V4SI 0 "register_operand" "=wa")
2687    (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2688                  (match_operand:V2DF 2 "register_operand" "wa")]
2689   UNSPEC_VSX_VSIGNED2)]
2690   "TARGET_VSX"
2691 {
2692   rtx rtx_src1, rtx_src2, rtx_dst;
2693   bool signed_convert=true;
2694
2695   rtx_dst = operands[0];
2696   rtx_src1 = operands[1];
2697   rtx_src2 = operands[2];
2698
2699   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2700   DONE;
2701 })
2702
2703 ;; Generate vsignedo_v2df
2704 ;; signed double float to int convert odd word
2705 (define_expand "vsignedo_v2df"
2706   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2707         (match_operand:V2DF 1 "register_operand" "wa"))]
2708   "TARGET_VSX"
2709 {
2710   if (BYTES_BIG_ENDIAN)
2711     {
2712       rtx rtx_tmp;
2713       rtx rtx_val = GEN_INT (12);
2714       rtx_tmp = gen_reg_rtx (V4SImode);
2715
2716       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2717
2718       /* Big endian word numbering for words in operand is 0 1 2 3.
2719          take (operand[1] operand[1]) and shift left one word
2720          0 1 2 3    0 1 2 3  =>  1 2 3 0
2721          Words 1 and 3 are now are now where they need to be for result.  */
2722
2723       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2724                  rtx_tmp, rtx_val));
2725     }
2726   else
2727     /* Little endian word numbering for operand is 3 2 1 0.
2728        Result words 3 and 1 are where they need to be.  */
2729     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2730
2731   DONE;
2732 }
2733   [(set_attr "type" "veccomplex")])
2734
2735 ;; Generate vsignede_v2df
2736 ;; signed double float to int even word
2737 (define_expand "vsignede_v2df"
2738   [(set (match_operand:V4SI 0 "register_operand" "=v")
2739         (match_operand:V2DF 1 "register_operand" "v"))]
2740   "TARGET_VSX"
2741 {
2742   if (BYTES_BIG_ENDIAN)
2743     /* Big endian word numbering for words in operand is 0 1
2744        Result words 0 is where they need to be.  */
2745     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2746
2747   else
2748     {
2749       rtx rtx_tmp;
2750       rtx rtx_val = GEN_INT (12);
2751       rtx_tmp = gen_reg_rtx (V4SImode);
2752
2753       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2754
2755       /* Little endian word numbering for operand is 3 2 1 0.
2756          take (operand[1] operand[1]) and shift left three words
2757          0 1 2 3   0 1 2 3  =>  3 0 1 2
2758          Words 0 and 2 are now where they need to be for the result.  */
2759       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2760                  rtx_tmp, rtx_val));
2761     }
2762   DONE;
2763 }
2764   [(set_attr "type" "veccomplex")])
2765
2766 ;; Generate unsigned2
2767 ;; convert two double float vectors to a vector of single precision
2768 ;; unsigned ints
2769 (define_expand "vunsigned2_v2df"
2770 [(match_operand:V4SI 0 "register_operand" "=v")
2771  (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2772                (match_operand:V2DF 2 "register_operand" "v")]
2773               UNSPEC_VSX_VSIGNED2)]
2774  "TARGET_VSX"
2775 {
2776   rtx rtx_src1, rtx_src2, rtx_dst;
2777   bool signed_convert=false;
2778
2779   rtx_dst = operands[0];
2780   rtx_src1 = operands[1];
2781   rtx_src2 = operands[2];
2782
2783   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2784   DONE;
2785 })
2786
2787 ;; Generate vunsignedo_v2df
2788 ;; unsigned double float to int convert odd word
2789 (define_expand "vunsignedo_v2df"
2790   [(set (match_operand:V4SI 0 "register_operand" "=v")
2791         (match_operand:V2DF 1 "register_operand" "v"))]
2792   "TARGET_VSX"
2793 {
2794   if (BYTES_BIG_ENDIAN)
2795     {
2796       rtx rtx_tmp;
2797       rtx rtx_val = GEN_INT (12);
2798       rtx_tmp = gen_reg_rtx (V4SImode);
2799
2800       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2801
2802       /* Big endian word numbering for words in operand is 0 1 2 3.
2803          take (operand[1] operand[1]) and shift left one word
2804          0 1 2 3    0 1 2 3  =>  1 2 3 0
2805          Words 1 and 3 are now are now where they need to be for result.  */
2806
2807       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2808                  rtx_tmp, rtx_val));
2809     }
2810   else
2811     /* Little endian word numbering for operand is 3 2 1 0.
2812        Result words 3 and 1 are where they need to be.  */
2813     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2814
2815   DONE;
2816 }
2817   [(set_attr "type" "veccomplex")])
2818
2819 ;; Generate vunsignede_v2df
2820 ;; unsigned double float to int even word
2821 (define_expand "vunsignede_v2df"
2822   [(set (match_operand:V4SI 0 "register_operand" "=v")
2823         (match_operand:V2DF 1 "register_operand" "v"))]
2824   "TARGET_VSX"
2825 {
2826   if (BYTES_BIG_ENDIAN)
2827     /* Big endian word numbering for words in operand is 0 1
2828        Result words 0 is where they need to be.  */
2829     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2830
2831   else
2832     {
2833       rtx rtx_tmp;
2834       rtx rtx_val = GEN_INT (12);
2835       rtx_tmp = gen_reg_rtx (V4SImode);
2836
2837       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2838
2839       /* Little endian word numbering for operand is 3 2 1 0.
2840          take (operand[1] operand[1]) and shift left three words
2841          0 1 2 3   0 1 2 3  =>  3 0 1 2
2842          Words 0 and 2 are now where they need to be for the result.  */
2843       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2844                  rtx_tmp, rtx_val));
2845     }
2846   DONE;
2847 }
2848   [(set_attr "type" "veccomplex")])
2849
2850 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2851 ;; since the xvrdpiz instruction does not truncate the value if the floating
2852 ;; point value is < LONG_MIN or > LONG_MAX.
2853 (define_insn "*vsx_float_fix_v2df2"
2854   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2855         (float:V2DF
2856          (fix:V2DI
2857           (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2858   "TARGET_HARD_FLOAT
2859    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2860    && !flag_trapping_math && TARGET_FRIZ"
2861   "xvrdpiz %x0,%x1"
2862   [(set_attr "type" "vecdouble")])
2863
2864 \f
2865 ;; Permute operations
2866
2867 ;; Build a V2DF/V2DI vector from two scalars
2868 (define_insn "vsx_concat_<mode>"
2869   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2870         (vec_concat:VSX_D
2871          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2872          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2873   "VECTOR_MEM_VSX_P (<MODE>mode)"
2874 {
2875   if (which_alternative == 0)
2876     return (BYTES_BIG_ENDIAN
2877             ? "xxpermdi %x0,%x1,%x2,0"
2878             : "xxpermdi %x0,%x2,%x1,0");
2879
2880   else if (which_alternative == 1)
2881     return (BYTES_BIG_ENDIAN
2882             ? "mtvsrdd %x0,%1,%2"
2883             : "mtvsrdd %x0,%2,%1");
2884
2885   else
2886     gcc_unreachable ();
2887 }
2888   [(set_attr "type" "vecperm,vecmove")])
2889
2890 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2891 ;; word element in a vector register.
2892 (define_insn "*vsx_concat_<mode>_1"
2893   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2894         (vec_concat:VSX_D
2895          (vec_select:<VS_scalar>
2896           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2897           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2898          (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2899   "VECTOR_MEM_VSX_P (<MODE>mode)"
2900 {
2901   HOST_WIDE_INT dword = INTVAL (operands[2]);
2902   if (BYTES_BIG_ENDIAN)
2903     {
2904       operands[4] = GEN_INT (2*dword);
2905       return "xxpermdi %x0,%x1,%x3,%4";
2906     }
2907   else
2908     {
2909       operands[4] = GEN_INT (!dword);
2910       return "xxpermdi %x0,%x3,%x1,%4";
2911     }
2912 }
2913   [(set_attr "type" "vecperm")])
2914
2915 (define_insn "*vsx_concat_<mode>_2"
2916   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2917         (vec_concat:VSX_D
2918          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2919          (vec_select:<VS_scalar>
2920           (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2921           (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2922   "VECTOR_MEM_VSX_P (<MODE>mode)"
2923 {
2924   HOST_WIDE_INT dword = INTVAL (operands[3]);
2925   if (BYTES_BIG_ENDIAN)
2926     {
2927       operands[4] = GEN_INT (dword);
2928       return "xxpermdi %x0,%x1,%x2,%4";
2929     }
2930   else
2931     {
2932       operands[4] = GEN_INT (2 * !dword);
2933       return "xxpermdi %x0,%x2,%x1,%4";
2934     }
2935 }
2936   [(set_attr "type" "vecperm")])
2937
2938 (define_insn "*vsx_concat_<mode>_3"
2939   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2940         (vec_concat:VSX_D
2941          (vec_select:<VS_scalar>
2942           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2943           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2944          (vec_select:<VS_scalar>
2945           (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2946           (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2947   "VECTOR_MEM_VSX_P (<MODE>mode)"
2948 {
2949   HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2950   HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2951   if (BYTES_BIG_ENDIAN)
2952     {
2953       operands[5] = GEN_INT ((2 * dword1) + dword2);
2954       return "xxpermdi %x0,%x1,%x3,%5";
2955     }
2956   else
2957     {
2958       operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2959       return "xxpermdi %x0,%x3,%x1,%5";
2960     }
2961 }
2962   [(set_attr "type" "vecperm")])
2963
2964 ;; Special purpose concat using xxpermdi to glue two single precision values
2965 ;; together, relying on the fact that internally scalar floats are represented
2966 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2967 (define_insn "vsx_concat_v2sf"
2968   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2969         (unspec:V2DF
2970          [(match_operand:SF 1 "vsx_register_operand" "wa")
2971           (match_operand:SF 2 "vsx_register_operand" "wa")]
2972          UNSPEC_VSX_CONCAT))]
2973   "VECTOR_MEM_VSX_P (V2DFmode)"
2974 {
2975   if (BYTES_BIG_ENDIAN)
2976     return "xxpermdi %x0,%x1,%x2,0";
2977   else
2978     return "xxpermdi %x0,%x2,%x1,0";
2979 }
2980   [(set_attr "type" "vecperm")])
2981
2982 ;; Concatenate 4 SImode elements into a V4SImode reg.
2983 (define_expand "vsx_init_v4si"
2984   [(use (match_operand:V4SI 0 "gpc_reg_operand"))
2985    (use (match_operand:SI 1 "gpc_reg_operand"))
2986    (use (match_operand:SI 2 "gpc_reg_operand"))
2987    (use (match_operand:SI 3 "gpc_reg_operand"))
2988    (use (match_operand:SI 4 "gpc_reg_operand"))]
2989    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2990 {
2991   rtx a = gen_reg_rtx (DImode);
2992   rtx b = gen_reg_rtx (DImode);
2993   rtx c = gen_reg_rtx (DImode);
2994   rtx d = gen_reg_rtx (DImode);
2995   emit_insn (gen_zero_extendsidi2 (a, operands[1]));
2996   emit_insn (gen_zero_extendsidi2 (b, operands[2]));
2997   emit_insn (gen_zero_extendsidi2 (c, operands[3]));
2998   emit_insn (gen_zero_extendsidi2 (d, operands[4]));
2999   if (!BYTES_BIG_ENDIAN)
3000     {
3001       std::swap (a, b);
3002       std::swap (c, d);
3003     }
3004
3005   rtx aa = gen_reg_rtx (DImode);
3006   rtx ab = gen_reg_rtx (DImode);
3007   rtx cc = gen_reg_rtx (DImode);
3008   rtx cd = gen_reg_rtx (DImode);
3009   emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
3010   emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
3011   emit_insn (gen_iordi3 (ab, aa, b));
3012   emit_insn (gen_iordi3 (cd, cc, d));
3013
3014   rtx abcd = gen_reg_rtx (V2DImode);
3015   emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
3016   emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
3017   DONE;
3018 })
3019
3020 ;; xxpermdi for little endian loads and stores.  We need several of
3021 ;; these since the form of the PARALLEL differs by mode.
3022 (define_insn "*vsx_xxpermdi2_le_<mode>"
3023   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3024         (vec_select:VSX_D
3025           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3026           (parallel [(const_int 1) (const_int 0)])))]
3027   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3028   "xxpermdi %x0,%x1,%x1,2"
3029   [(set_attr "type" "vecperm")])
3030
3031 (define_insn "xxswapd_v16qi"
3032   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3033         (vec_select:V16QI
3034           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3035           (parallel [(const_int 8) (const_int 9)
3036                      (const_int 10) (const_int 11)
3037                      (const_int 12) (const_int 13)
3038                      (const_int 14) (const_int 15)
3039                      (const_int 0) (const_int 1)
3040                      (const_int 2) (const_int 3)
3041                      (const_int 4) (const_int 5)
3042                      (const_int 6) (const_int 7)])))]
3043   "TARGET_VSX"
3044 ;; AIX does not support the extended mnemonic xxswapd.  Use the basic
3045 ;; mnemonic xxpermdi instead.
3046   "xxpermdi %x0,%x1,%x1,2"
3047   [(set_attr "type" "vecperm")])
3048
3049 (define_insn "xxswapd_v8hi"
3050   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3051         (vec_select:V8HI
3052           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3053           (parallel [(const_int 4) (const_int 5)
3054                      (const_int 6) (const_int 7)
3055                      (const_int 0) (const_int 1)
3056                      (const_int 2) (const_int 3)])))]
3057   "TARGET_VSX"
3058 ;; AIX does not support the extended mnemonic xxswapd.  Use the basic
3059 ;; mnemonic xxpermdi instead.
3060   "xxpermdi %x0,%x1,%x1,2"
3061   [(set_attr "type" "vecperm")])
3062
3063 (define_insn "xxswapd_<mode>"
3064   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3065         (vec_select:VSX_W
3066           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3067           (parallel [(const_int 2) (const_int 3)
3068                      (const_int 0) (const_int 1)])))]
3069   "TARGET_VSX"
3070 ;; AIX does not support extended mnemonic xxswapd.  Use the basic
3071 ;; mnemonic xxpermdi instead.
3072   "xxpermdi %x0,%x1,%x1,2"
3073   [(set_attr "type" "vecperm")])
3074
3075 (define_insn "xxswapd_<mode>"
3076   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3077         (vec_select:VSX_D
3078           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3079           (parallel [(const_int 1) (const_int 0)])))]
3080   "TARGET_VSX"
3081 ;; AIX does not support extended mnemonic xxswapd.  Use the basic
3082 ;; mnemonic xxpermdi instead.
3083   "xxpermdi %x0,%x1,%x1,2"
3084   [(set_attr "type" "vecperm")])
3085
3086 (define_insn "xxgenpcvm_<mode>_internal"
3087   [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3088         (unspec:VSX_EXTRACT_I4
3089          [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3090           (match_operand:QI 2 "const_0_to_3_operand" "n")]
3091          UNSPEC_XXGENPCV))]
3092     "TARGET_POWER10 && TARGET_64BIT"
3093     "xxgenpcv<wd>m %x0,%1,%2"
3094     [(set_attr "type" "vecsimple")])
3095
3096 (define_expand "xxgenpcvm_<mode>"
3097   [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3098    (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3099    (use (match_operand:QI 2 "immediate_operand"))]
3100   "TARGET_POWER10"
3101 {
3102   if (!BYTES_BIG_ENDIAN)
3103     {
3104       /* gen_xxgenpcvm assumes Big Endian order.  If LE,
3105          change swap upper and lower double words.  */
3106       rtx tmp = gen_reg_rtx (<MODE>mode);
3107
3108       emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3109       operands[1] = tmp;
3110     }
3111     emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3112                                               operands[2]));
3113   DONE;
3114 })
3115
3116 ;; lxvd2x for little endian loads.  We need several of
3117 ;; these since the form of the PARALLEL differs by mode.
3118 (define_insn "*vsx_lxvd2x2_le_<mode>"
3119   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3120         (vec_select:VSX_D
3121           (match_operand:VSX_D 1 "memory_operand" "Z")
3122           (parallel [(const_int 1) (const_int 0)])))]
3123   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3124   "lxvd2x %x0,%y1"
3125   [(set_attr "type" "vecload")])
3126
3127 (define_insn "*vsx_lxvd2x4_le_<mode>"
3128   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3129         (vec_select:VSX_W
3130           (match_operand:VSX_W 1 "memory_operand" "Z")
3131           (parallel [(const_int 2) (const_int 3)
3132                      (const_int 0) (const_int 1)])))]
3133   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3134   "lxvd2x %x0,%y1"
3135   [(set_attr "type" "vecload")])
3136
3137 (define_insn "*vsx_lxvd2x8_le_V8HI"
3138   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3139         (vec_select:V8HI
3140           (match_operand:V8HI 1 "memory_operand" "Z")
3141           (parallel [(const_int 4) (const_int 5)
3142                      (const_int 6) (const_int 7)
3143                      (const_int 0) (const_int 1)
3144                      (const_int 2) (const_int 3)])))]
3145   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3146   "lxvd2x %x0,%y1"
3147   [(set_attr "type" "vecload")])
3148
3149 (define_insn "*vsx_lxvd2x16_le_V16QI"
3150   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3151         (vec_select:V16QI
3152           (match_operand:V16QI 1 "memory_operand" "Z")
3153           (parallel [(const_int 8) (const_int 9)
3154                      (const_int 10) (const_int 11)
3155                      (const_int 12) (const_int 13)
3156                      (const_int 14) (const_int 15)
3157                      (const_int 0) (const_int 1)
3158                      (const_int 2) (const_int 3)
3159                      (const_int 4) (const_int 5)
3160                      (const_int 6) (const_int 7)])))]
3161   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3162   "lxvd2x %x0,%y1"
3163   [(set_attr "type" "vecload")])
3164
3165 ;; stxvd2x for little endian stores.  We need several of
3166 ;; these since the form of the PARALLEL differs by mode.
3167 (define_insn "*vsx_stxvd2x2_le_<mode>"
3168   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3169         (vec_select:VSX_D
3170           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3171           (parallel [(const_int 1) (const_int 0)])))]
3172   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3173   "stxvd2x %x1,%y0"
3174   [(set_attr "type" "vecstore")])
3175
3176 (define_insn "*vsx_stxvd2x4_le_<mode>"
3177   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3178         (vec_select:VSX_W
3179           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3180           (parallel [(const_int 2) (const_int 3)
3181                      (const_int 0) (const_int 1)])))]
3182   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3183   "stxvd2x %x1,%y0"
3184   [(set_attr "type" "vecstore")])
3185
3186 (define_insn "*vsx_stxvd2x8_le_V8HI"
3187   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3188         (vec_select:V8HI
3189           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3190           (parallel [(const_int 4) (const_int 5)
3191                      (const_int 6) (const_int 7)
3192                      (const_int 0) (const_int 1)
3193                      (const_int 2) (const_int 3)])))]
3194   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3195   "stxvd2x %x1,%y0"
3196   [(set_attr "type" "vecstore")])
3197
3198 (define_insn "*vsx_stxvd2x16_le_V16QI"
3199   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3200         (vec_select:V16QI
3201           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3202           (parallel [(const_int 8) (const_int 9)
3203                      (const_int 10) (const_int 11)
3204                      (const_int 12) (const_int 13)
3205                      (const_int 14) (const_int 15)
3206                      (const_int 0) (const_int 1)
3207                      (const_int 2) (const_int 3)
3208                      (const_int 4) (const_int 5)
3209                      (const_int 6) (const_int 7)])))]
3210   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3211   "stxvd2x %x1,%y0"
3212   [(set_attr "type" "vecstore")])
3213
3214 ;; Convert a TImode value into V1TImode
3215 (define_expand "vsx_set_v1ti"
3216   [(match_operand:V1TI 0 "nonimmediate_operand")
3217    (match_operand:V1TI 1 "nonimmediate_operand")
3218    (match_operand:TI 2 "input_operand")
3219    (match_operand:QI 3 "u5bit_cint_operand")]
3220   "VECTOR_MEM_VSX_P (V1TImode)"
3221 {
3222   if (operands[3] != const0_rtx)
3223     gcc_unreachable ();
3224
3225   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3226   DONE;
3227 })
3228
3229 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3230 (define_expand "vsx_set_<mode>"
3231   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3232    (use (match_operand:VSX_D 1 "vsx_register_operand"))
3233    (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3234    (use (match_operand:QI 3 "const_0_to_1_operand"))]
3235   "VECTOR_MEM_VSX_P (<MODE>mode)"
3236 {
3237   rtx dest = operands[0];
3238   rtx vec_reg = operands[1];
3239   rtx value = operands[2];
3240   rtx ele = operands[3];
3241   rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3242
3243   if (ele == const0_rtx)
3244     {
3245       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3246       emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3247       DONE;
3248     }
3249   else if (ele == const1_rtx)
3250     {
3251       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3252       emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3253       DONE;
3254     }
3255   else
3256     gcc_unreachable ();
3257 })
3258
3259 ;; Extract a DF/DI element from V2DF/V2DI
3260 ;; Optimize cases were we can do a simple or direct move.
3261 ;; Or see if we can avoid doing the move at all
3262
3263 ;; There are some unresolved problems with reload that show up if an Altivec
3264 ;; register was picked.  Limit the scalar value to FPRs for now.
3265
3266 (define_insn "vsx_extract_<mode>"
3267   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d,  wr, wr")
3268         (vec_select:<VS_scalar>
3269          (match_operand:VSX_D 1 "gpc_reg_operand"      "wa, wa, wa, wa")
3270          (parallel
3271           [(match_operand:QI 2 "const_0_to_1_operand"  "wD, n,  wD, n")])))]
3272   "VECTOR_MEM_VSX_P (<MODE>mode)"
3273 {
3274   int element = INTVAL (operands[2]);
3275   int op0_regno = REGNO (operands[0]);
3276   int op1_regno = REGNO (operands[1]);
3277   int fldDM;
3278
3279   gcc_assert (IN_RANGE (element, 0, 1));
3280   gcc_assert (VSX_REGNO_P (op1_regno));
3281
3282   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3283     {
3284       if (op0_regno == op1_regno)
3285         return ASM_COMMENT_START " vec_extract to same register";
3286
3287       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3288                && TARGET_POWERPC64)
3289         return "mfvsrd %0,%x1";
3290
3291       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3292         return "fmr %0,%1";
3293
3294       else if (VSX_REGNO_P (op0_regno))
3295         return "xxlor %x0,%x1,%x1";
3296
3297       else
3298         gcc_unreachable ();
3299     }
3300
3301   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3302            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3303     return "mfvsrld %0,%x1";
3304
3305   else if (VSX_REGNO_P (op0_regno))
3306     {
3307       fldDM = element << 1;
3308       if (!BYTES_BIG_ENDIAN)
3309         fldDM = 3 - fldDM;
3310       operands[3] = GEN_INT (fldDM);
3311       return "xxpermdi %x0,%x1,%x1,%3";
3312     }
3313
3314   else
3315     gcc_unreachable ();
3316 }
3317   [(set_attr "type" "veclogical,mfvsr,mfvsr,vecperm")
3318    (set_attr "isa" "*,*,p8v,p9v")])
3319
3320 ;; Optimize extracting a single scalar element from memory.
3321 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3322   [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3323         (vec_select:<VSX_D:VS_scalar>
3324          (match_operand:VSX_D 1 "memory_operand" "m,m")
3325          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3326    (clobber (match_scratch:P 3 "=&b,&b"))]
3327   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3328   "#"
3329   "&& reload_completed"
3330   [(set (match_dup 0) (match_dup 4))]
3331 {
3332   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3333                                            operands[3], <VSX_D:VS_scalar>mode);
3334 }
3335   [(set_attr "type" "fpload,load")
3336    (set_attr "length" "8")])
3337
3338 ;; Optimize storing a single scalar element that is the right location to
3339 ;; memory
3340 (define_insn "*vsx_extract_<mode>_store"
3341   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3342         (vec_select:<VS_scalar>
3343          (match_operand:VSX_D 1 "register_operand" "d,v,v")
3344          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3345   "VECTOR_MEM_VSX_P (<MODE>mode)"
3346   "@
3347    stfd%U0%X0 %1,%0
3348    stxsdx %x1,%y0
3349    stxsd %1,%0"
3350   [(set_attr "type" "fpstore")
3351    (set_attr "isa" "*,p7v,p9v")])
3352
3353 ;; Variable V2DI/V2DF extract shift
3354 (define_insn "vsx_vslo_<mode>"
3355   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3356         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3357                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3358                             UNSPEC_VSX_VSLO))]
3359   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3360   "vslo %0,%1,%2"
3361   [(set_attr "type" "vecperm")])
3362
3363 ;; Variable V2DI/V2DF extract from a register
3364 (define_insn_and_split "vsx_extract_<mode>_var"
3365   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3366         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3367                              (match_operand:DI 2 "gpc_reg_operand" "r")]
3368                             UNSPEC_VSX_EXTRACT))
3369    (clobber (match_scratch:DI 3 "=r"))
3370    (clobber (match_scratch:V2DI 4 "=&v"))]
3371   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3372   "#"
3373   "&& reload_completed"
3374   [(const_int 0)]
3375 {
3376   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3377                                 operands[3], operands[4]);
3378   DONE;
3379 })
3380
3381 ;; Variable V2DI/V2DF extract from memory
3382 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3383   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r")
3384         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3385                              (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3386                             UNSPEC_VSX_EXTRACT))
3387    (clobber (match_scratch:DI 3 "=&b,&b"))]
3388   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3389   "#"
3390   "&& reload_completed"
3391   [(set (match_dup 0) (match_dup 4))]
3392 {
3393   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3394                                            operands[3], <VS_scalar>mode);
3395 }
3396   [(set_attr "type" "fpload,load")])
3397
3398 ;; Extract a SF element from V4SF
3399 (define_insn_and_split "vsx_extract_v4sf"
3400   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3401         (vec_select:SF
3402          (match_operand:V4SF 1 "vsx_register_operand" "wa")
3403          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3404    (clobber (match_scratch:V4SF 3 "=0"))]
3405   "VECTOR_UNIT_VSX_P (V4SFmode)"
3406   "#"
3407   "&& 1"
3408   [(const_int 0)]
3409 {
3410   rtx op0 = operands[0];
3411   rtx op1 = operands[1];
3412   rtx op2 = operands[2];
3413   rtx op3 = operands[3];
3414   rtx tmp;
3415   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3416
3417   if (ele == 0)
3418     tmp = op1;
3419   else
3420     {
3421       if (GET_CODE (op3) == SCRATCH)
3422         op3 = gen_reg_rtx (V4SFmode);
3423       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3424       tmp = op3;
3425     }
3426   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3427   DONE;
3428 }
3429   [(set_attr "length" "8")
3430    (set_attr "type" "fp")])
3431
3432 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3433   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3434         (vec_select:SF
3435          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3436          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3437    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3438   "VECTOR_MEM_VSX_P (V4SFmode)"
3439   "#"
3440   "&& reload_completed"
3441   [(set (match_dup 0) (match_dup 4))]
3442 {
3443   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3444                                            operands[3], SFmode);
3445 }
3446   [(set_attr "type" "fpload,fpload,fpload,load")
3447    (set_attr "length" "8")
3448    (set_attr "isa" "*,p7v,p9v,*")])
3449
3450 ;; Variable V4SF extract from a register
3451 (define_insn_and_split "vsx_extract_v4sf_var"
3452   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3453         (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3454                     (match_operand:DI 2 "gpc_reg_operand" "r")]
3455                    UNSPEC_VSX_EXTRACT))
3456    (clobber (match_scratch:DI 3 "=r"))
3457    (clobber (match_scratch:V2DI 4 "=&v"))]
3458   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3459   "#"
3460   "&& reload_completed"
3461   [(const_int 0)]
3462 {
3463   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3464                                 operands[3], operands[4]);
3465   DONE;
3466 })
3467
3468 ;; Variable V4SF extract from memory
3469 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3470   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3471         (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3472                     (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3473                    UNSPEC_VSX_EXTRACT))
3474    (clobber (match_scratch:DI 3 "=&b,&b"))]
3475   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3476   "#"
3477   "&& reload_completed"
3478   [(set (match_dup 0) (match_dup 4))]
3479 {
3480   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3481                                            operands[3], SFmode);
3482 }
3483   [(set_attr "type" "fpload,load")])
3484
3485 ;; Expand the builtin form of xxpermdi to canonical rtl.
3486 (define_expand "vsx_xxpermdi_<mode>"
3487   [(match_operand:VSX_L 0 "vsx_register_operand")
3488    (match_operand:VSX_L 1 "vsx_register_operand")
3489    (match_operand:VSX_L 2 "vsx_register_operand")
3490    (match_operand:QI 3 "u5bit_cint_operand")]
3491   "VECTOR_MEM_VSX_P (<MODE>mode)"
3492 {
3493   rtx target = operands[0];
3494   rtx op0 = operands[1];
3495   rtx op1 = operands[2];
3496   int mask = INTVAL (operands[3]);
3497   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3498   rtx perm1 = GEN_INT ((mask & 1) + 2);
3499   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3500
3501   if (<MODE>mode == V2DFmode)
3502     gen = gen_vsx_xxpermdi2_v2df_1;
3503   else
3504     {
3505       gen = gen_vsx_xxpermdi2_v2di_1;
3506       if (<MODE>mode != V2DImode)
3507         {
3508           target = gen_lowpart (V2DImode, target);
3509           op0 = gen_lowpart (V2DImode, op0);
3510           op1 = gen_lowpart (V2DImode, op1);
3511         }
3512     }
3513   emit_insn (gen (target, op0, op1, perm0, perm1));
3514   DONE;
3515 })
3516
3517 ;; Special version of xxpermdi that retains big-endian semantics.
3518 (define_expand "vsx_xxpermdi_<mode>_be"
3519   [(match_operand:VSX_L 0 "vsx_register_operand")
3520    (match_operand:VSX_L 1 "vsx_register_operand")
3521    (match_operand:VSX_L 2 "vsx_register_operand")
3522    (match_operand:QI 3 "u5bit_cint_operand")]
3523   "VECTOR_MEM_VSX_P (<MODE>mode)"
3524 {
3525   rtx target = operands[0];
3526   rtx op0 = operands[1];
3527   rtx op1 = operands[2];
3528   int mask = INTVAL (operands[3]);
3529   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3530   rtx perm1 = GEN_INT ((mask & 1) + 2);
3531   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3532
3533   if (<MODE>mode == V2DFmode)
3534     gen = gen_vsx_xxpermdi2_v2df_1;
3535   else
3536     {
3537       gen = gen_vsx_xxpermdi2_v2di_1;
3538       if (<MODE>mode != V2DImode)
3539         {
3540           target = gen_lowpart (V2DImode, target);
3541           op0 = gen_lowpart (V2DImode, op0);
3542           op1 = gen_lowpart (V2DImode, op1);
3543         }
3544     }
3545   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3546      transformation we don't want; it is necessary for
3547      rs6000_expand_vec_perm_const_1 but not for this use.  So we
3548      prepare for that by reversing the transformation here.  */
3549   if (BYTES_BIG_ENDIAN)
3550     emit_insn (gen (target, op0, op1, perm0, perm1));
3551   else
3552     {
3553       rtx p0 = GEN_INT (3 - INTVAL (perm1));
3554       rtx p1 = GEN_INT (3 - INTVAL (perm0));
3555       emit_insn (gen (target, op1, op0, p0, p1));
3556     }
3557   DONE;
3558 })
3559
3560 (define_insn "vsx_xxpermdi2_<mode>_1"
3561   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3562         (vec_select:VSX_D
3563           (vec_concat:<VS_double>
3564             (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3565             (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3566           (parallel [(match_operand 3 "const_0_to_1_operand" "")
3567                      (match_operand 4 "const_2_to_3_operand" "")])))]
3568   "VECTOR_MEM_VSX_P (<MODE>mode)"
3569 {
3570   int op3, op4, mask;
3571
3572   /* For little endian, swap operands and invert/swap selectors
3573      to get the correct xxpermdi.  The operand swap sets up the
3574      inputs as a little endian array.  The selectors are swapped
3575      because they are defined to use big endian ordering.  The
3576      selectors are inverted to get the correct doublewords for
3577      little endian ordering.  */
3578   if (BYTES_BIG_ENDIAN)
3579     {
3580       op3 = INTVAL (operands[3]);
3581       op4 = INTVAL (operands[4]);
3582     }
3583   else
3584     {
3585       op3 = 3 - INTVAL (operands[4]);
3586       op4 = 3 - INTVAL (operands[3]);
3587     }
3588
3589   mask = (op3 << 1) | (op4 - 2);
3590   operands[3] = GEN_INT (mask);
3591
3592   if (BYTES_BIG_ENDIAN)
3593     return "xxpermdi %x0,%x1,%x2,%3";
3594   else
3595     return "xxpermdi %x0,%x2,%x1,%3";
3596 }
3597   [(set_attr "type" "vecperm")])
3598
3599 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3600 ;; none of the small types were allowed in a vector register, so we had to
3601 ;; extract to a DImode and either do a direct move or store.
3602 (define_expand  "vsx_extract_<mode>"
3603   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3604                    (vec_select:<VS_scalar>
3605                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3606                     (parallel [(match_operand:QI 2 "const_int_operand")])))
3607               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3608   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3609 {
3610   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3611   if (TARGET_P9_VECTOR)
3612     {
3613       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3614                                             operands[2]));
3615       DONE;
3616     }
3617 })
3618
3619 (define_insn "vsx_extract_<mode>_p9"
3620   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3621         (vec_select:<VS_scalar>
3622          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3623          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3624    (clobber (match_scratch:SI 3 "=r,X"))]
3625   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3626 {
3627   if (which_alternative == 0)
3628     return "#";
3629
3630   else
3631     {
3632       HOST_WIDE_INT elt = INTVAL (operands[2]);
3633       HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3634                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3635                                : elt);
3636
3637       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3638       HOST_WIDE_INT offset = unit_size * elt_adj;
3639
3640       operands[2] = GEN_INT (offset);
3641       if (unit_size == 4)
3642         return "xxextractuw %x0,%x1,%2";
3643       else
3644         return "vextractu<wd> %0,%1,%2";
3645     }
3646 }
3647   [(set_attr "type" "vecsimple")
3648    (set_attr "isa" "p9v,*")])
3649
3650 (define_split
3651   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3652         (vec_select:<VS_scalar>
3653          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3654          (parallel [(match_operand:QI 2 "const_int_operand")])))
3655    (clobber (match_operand:SI 3 "int_reg_operand"))]
3656   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3657   [(const_int 0)]
3658 {
3659   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3660   rtx op1 = operands[1];
3661   rtx op2 = operands[2];
3662   rtx op3 = operands[3];
3663   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3664
3665   emit_move_insn (op3, GEN_INT (offset));
3666   if (BYTES_BIG_ENDIAN)
3667     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3668   else
3669     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3670   DONE;
3671 })
3672
3673 ;; Optimize zero extracts to eliminate the AND after the extract.
3674 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3675   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3676         (zero_extend:DI
3677          (vec_select:<VS_scalar>
3678           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3679           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3680    (clobber (match_scratch:SI 3 "=r,X"))]
3681   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3682   "#"
3683   "&& reload_completed"
3684   [(parallel [(set (match_dup 4)
3685                    (vec_select:<VS_scalar>
3686                     (match_dup 1)
3687                     (parallel [(match_dup 2)])))
3688               (clobber (match_dup 3))])]
3689 {
3690   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3691 }
3692   [(set_attr "isa" "p9v,*")])
3693
3694 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3695 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3696   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3697         (vec_select:<VS_scalar>
3698          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3699          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3700    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3701    (clobber (match_scratch:SI 4 "=X,&r"))]
3702   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3703   "#"
3704   "&& reload_completed"
3705   [(parallel [(set (match_dup 3)
3706                    (vec_select:<VS_scalar>
3707                     (match_dup 1)
3708                     (parallel [(match_dup 2)])))
3709               (clobber (match_dup 4))])
3710    (set (match_dup 0)
3711         (match_dup 3))])
3712
3713 (define_insn_and_split  "*vsx_extract_si"
3714   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3715         (vec_select:SI
3716          (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3717          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3718    (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3719   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3720   "#"
3721   "&& reload_completed"
3722   [(const_int 0)]
3723 {
3724   rtx dest = operands[0];
3725   rtx src = operands[1];
3726   rtx element = operands[2];
3727   rtx vec_tmp = operands[3];
3728   int value;
3729
3730   if (!BYTES_BIG_ENDIAN)
3731     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3732
3733   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3734      instruction.  */
3735   value = INTVAL (element);
3736   if (value != 1)
3737     emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3738   else
3739     vec_tmp = src;
3740
3741   if (MEM_P (operands[0]))
3742     {
3743       if (can_create_pseudo_p ())
3744         dest = rs6000_force_indexed_or_indirect_mem (dest);
3745
3746       if (TARGET_P8_VECTOR)
3747         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3748       else
3749         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3750     }
3751
3752   else if (TARGET_P8_VECTOR)
3753     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3754   else
3755     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3756                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
3757
3758   DONE;
3759 }
3760   [(set_attr "type" "mfvsr,vecperm,fpstore")
3761    (set_attr "length" "8")
3762    (set_attr "isa" "*,p8v,*")])
3763
3764 (define_insn_and_split  "*vsx_extract_<mode>_p8"
3765   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3766         (vec_select:<VS_scalar>
3767          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3768          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3769    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3770   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3771    && !TARGET_P9_VECTOR"
3772   "#"
3773   "&& reload_completed"
3774   [(const_int 0)]
3775 {
3776   rtx dest = operands[0];
3777   rtx src = operands[1];
3778   rtx element = operands[2];
3779   rtx vec_tmp = operands[3];
3780   int value;
3781
3782   if (!BYTES_BIG_ENDIAN)
3783     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3784
3785   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3786      instruction.  */
3787   value = INTVAL (element);
3788   if (<MODE>mode == V16QImode)
3789     {
3790       if (value != 7)
3791         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3792       else
3793         vec_tmp = src;
3794     }
3795   else if (<MODE>mode == V8HImode)
3796     {
3797       if (value != 3)
3798         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3799       else
3800         vec_tmp = src;
3801     }
3802   else
3803     gcc_unreachable ();
3804
3805   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3806                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
3807   DONE;
3808 }
3809   [(set_attr "type" "mfvsr")])
3810
3811 ;; Optimize extracting a single scalar element from memory.
3812 (define_insn_and_split "*vsx_extract_<mode>_load"
3813   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3814         (vec_select:<VS_scalar>
3815          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3816          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3817    (clobber (match_scratch:DI 3 "=&b"))]
3818   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3819   "#"
3820   "&& reload_completed"
3821   [(set (match_dup 0) (match_dup 4))]
3822 {
3823   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3824                                            operands[3], <VS_scalar>mode);
3825 }
3826   [(set_attr "type" "load")
3827    (set_attr "length" "8")])
3828
3829 ;; Variable V16QI/V8HI/V4SI extract from a register
3830 (define_insn_and_split "vsx_extract_<mode>_var"
3831   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
3832         (unspec:<VS_scalar>
3833          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3834           (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3835          UNSPEC_VSX_EXTRACT))
3836    (clobber (match_scratch:DI 3 "=r,r"))
3837    (clobber (match_scratch:V2DI 4 "=X,&v"))]
3838   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3839   "#"
3840   "&& reload_completed"
3841   [(const_int 0)]
3842 {
3843   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3844                                 operands[3], operands[4]);
3845   DONE;
3846 }
3847   [(set_attr "isa" "p9v,*")])
3848
3849 ;; Variable V16QI/V8HI/V4SI extract from memory
3850 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3851   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r")
3852         (unspec:<VS_scalar>
3853          [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3854           (match_operand:DI 2 "gpc_reg_operand" "r")]
3855          UNSPEC_VSX_EXTRACT))
3856    (clobber (match_scratch:DI 3 "=&b"))]
3857   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3858   "#"
3859   "&& reload_completed"
3860   [(set (match_dup 0) (match_dup 4))]
3861 {
3862   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3863                                            operands[3], <VS_scalar>mode);
3864 }
3865   [(set_attr "type" "load")])
3866
3867 ;; ISA 3.1 extract
3868 (define_expand "vextractl<mode>"
3869   [(set (match_operand:V2DI 0 "altivec_register_operand")
3870         (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
3871                       (match_operand:VI2 2 "altivec_register_operand")
3872                       (match_operand:SI 3 "register_operand")]
3873                      UNSPEC_EXTRACTL))]
3874   "TARGET_POWER10"
3875 {
3876   if (BYTES_BIG_ENDIAN)
3877     {
3878       emit_insn (gen_vextractl<mode>_internal (operands[0], operands[1],
3879                                                operands[2], operands[3]));
3880       emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
3881     }
3882   else
3883     emit_insn (gen_vextractr<mode>_internal (operands[0], operands[2],
3884                                              operands[1], operands[3]));
3885   DONE;
3886 })
3887
3888 (define_insn "vextractl<mode>_internal"
3889   [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
3890         (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
3891                       (match_operand:VEC_I 2 "altivec_register_operand" "v")
3892                       (match_operand:SI 3 "register_operand" "r")]
3893                      UNSPEC_EXTRACTL))]
3894   "TARGET_POWER10"
3895   "vext<du_or_d><wd>vlx %0,%1,%2,%3"
3896   [(set_attr "type" "vecsimple")])
3897
3898 (define_expand "vextractr<mode>"
3899   [(set (match_operand:V2DI 0 "altivec_register_operand")
3900         (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
3901                       (match_operand:VI2 2 "altivec_register_operand")
3902                       (match_operand:SI 3 "register_operand")]
3903                      UNSPEC_EXTRACTR))]
3904   "TARGET_POWER10"
3905 {
3906   if (BYTES_BIG_ENDIAN)
3907     {
3908       emit_insn (gen_vextractr<mode>_internal (operands[0], operands[1],
3909                                                operands[2], operands[3]));
3910       emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
3911     }
3912   else
3913     emit_insn (gen_vextractl<mode>_internal (operands[0], operands[2],
3914                                              operands[1], operands[3]));
3915   DONE;
3916 })
3917
3918 (define_insn "vextractr<mode>_internal"
3919   [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
3920         (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
3921                       (match_operand:VEC_I 2 "altivec_register_operand" "v")
3922                       (match_operand:SI 3 "register_operand" "r")]
3923                      UNSPEC_EXTRACTR))]
3924   "TARGET_POWER10"
3925   "vext<du_or_d><wd>vrx %0,%1,%2,%3"
3926   [(set_attr "type" "vecsimple")])
3927
3928 (define_expand "vinsertvl_<mode>"
3929   [(set (match_operand:VI2 0 "altivec_register_operand")
3930         (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
3931                      (match_operand:VI2 2 "altivec_register_operand")
3932                      (match_operand:SI 3 "register_operand" "r")]
3933                     UNSPEC_INSERTL))]
3934   "TARGET_POWER10"
3935 {
3936   if (BYTES_BIG_ENDIAN)
3937      emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
3938                                                operands[1], operands[2]));
3939    else
3940      emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
3941                                                operands[1], operands[2]));
3942    DONE;
3943 })
3944
3945 (define_insn "vinsertvl_internal_<mode>"
3946   [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
3947         (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
3948                        (match_operand:VEC_I 2 "altivec_register_operand" "v")
3949                        (match_operand:VEC_I 3 "altivec_register_operand" "0")]
3950                       UNSPEC_INSERTL))]
3951   "TARGET_POWER10"
3952   "vins<wd>vlx %0,%1,%2"
3953   [(set_attr "type" "vecsimple")])
3954
3955 (define_expand "vinsertvr_<mode>"
3956   [(set (match_operand:VI2 0 "altivec_register_operand")
3957         (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
3958                      (match_operand:VI2 2 "altivec_register_operand")
3959                      (match_operand:SI 3 "register_operand" "r")]
3960                     UNSPEC_INSERTR))]
3961   "TARGET_POWER10"
3962 {
3963   if (BYTES_BIG_ENDIAN)
3964      emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
3965                                                operands[1], operands[2]));
3966    else
3967      emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
3968                                                operands[1], operands[2]));
3969    DONE;
3970 })
3971
3972 (define_insn "vinsertvr_internal_<mode>"
3973   [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
3974         (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
3975                        (match_operand:VEC_I 2 "altivec_register_operand" "v")
3976                        (match_operand:VEC_I 3 "altivec_register_operand" "0")]
3977                       UNSPEC_INSERTR))]
3978   "TARGET_POWER10"
3979   "vins<wd>vrx %0,%1,%2"
3980   [(set_attr "type" "vecsimple")])
3981
3982 (define_expand "vinsertgl_<mode>"
3983   [(set (match_operand:VI2 0 "altivec_register_operand")
3984         (unspec:VI2 [(match_operand:SI 1 "register_operand")
3985                      (match_operand:VI2 2 "altivec_register_operand")
3986                      (match_operand:SI 3 "register_operand")]
3987                     UNSPEC_INSERTL))]
3988   "TARGET_POWER10"
3989 {
3990   if (BYTES_BIG_ENDIAN)
3991     emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
3992                                             operands[1], operands[2]));
3993   else
3994     emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
3995                                             operands[1], operands[2]));
3996   DONE;
3997  })
3998
3999 (define_insn "vinsertgl_internal_<mode>"
4000  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4001        (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4002                       (match_operand:SI 2 "register_operand" "r")
4003                       (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4004                      UNSPEC_INSERTL))]
4005  "TARGET_POWER10"
4006  "vins<wd>lx %0,%1,%2"
4007  [(set_attr "type" "vecsimple")])
4008
4009 (define_expand "vinsertgr_<mode>"
4010   [(set (match_operand:VI2 0 "altivec_register_operand")
4011         (unspec:VI2 [(match_operand:SI 1 "register_operand")
4012                      (match_operand:VI2 2 "altivec_register_operand")
4013                      (match_operand:SI 3 "register_operand")]
4014                     UNSPEC_INSERTR))]
4015   "TARGET_POWER10"
4016 {
4017   if (BYTES_BIG_ENDIAN)
4018     emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4019                                             operands[1], operands[2]));
4020   else
4021     emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4022                                             operands[1], operands[2]));
4023   DONE;
4024  })
4025
4026 (define_insn "vinsertgr_internal_<mode>"
4027  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4028    (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4029                   (match_operand:SI 2 "register_operand" "r")
4030                   (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4031                  UNSPEC_INSERTR))]
4032  "TARGET_POWER10"
4033  "vins<wd>rx %0,%1,%2"
4034  [(set_attr "type" "vecsimple")])
4035
4036 (define_expand "vreplace_elt_<mode>"
4037   [(set (match_operand:REPLACE_ELT 0 "register_operand")
4038   (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
4039                        (match_operand:<VS_scalar> 2 "register_operand")
4040                        (match_operand:QI 3 "const_0_to_3_operand")]
4041                       UNSPEC_REPLACE_ELT))]
4042  "TARGET_POWER10"
4043 {
4044    int index;
4045    /* Immediate value is the word index, convert to byte index and adjust for
4046       Endianness if needed.  */
4047    if (BYTES_BIG_ENDIAN)
4048      index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
4049
4050    else
4051      index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
4052
4053    emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4054                                             operands[2],
4055                                             GEN_INT (index)));
4056    DONE;
4057  }
4058 [(set_attr "type" "vecsimple")])
4059
4060 (define_expand "vreplace_un_<mode>"
4061  [(set (match_operand:REPLACE_ELT 0 "register_operand")
4062  (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
4063                       (match_operand:<VS_scalar> 2 "register_operand")
4064                       (match_operand:QI 3 "const_0_to_12_operand")]
4065                      UNSPEC_REPLACE_UN))]
4066  "TARGET_POWER10"
4067 {
4068    /* Immediate value is the byte index Big Endian numbering.  */
4069    emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4070                                             operands[2], operands[3]));
4071    DONE;
4072  }
4073 [(set_attr "type" "vecsimple")])
4074
4075 (define_insn "vreplace_elt_<mode>_inst"
4076  [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v")
4077   (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0")
4078                        (match_operand:<VS_scalar> 2 "register_operand" "r")
4079                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
4080                       UNSPEC_REPLACE_ELT))]
4081  "TARGET_POWER10"
4082  "vins<REPLACE_ELT_char> %0,%2,%3"
4083  [(set_attr "type" "vecsimple")])
4084
4085 ;; VSX_EXTRACT optimizations
4086 ;; Optimize double d = (double) vec_extract (vi, <n>)
4087 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
4088 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
4089   [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
4090         (any_float:DF
4091          (vec_select:SI
4092           (match_operand:V4SI 1 "gpc_reg_operand" "v")
4093           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4094    (clobber (match_scratch:V4SI 3 "=v"))]
4095   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4096   "#"
4097   "&& 1"
4098   [(const_int 0)]
4099 {
4100   rtx dest = operands[0];
4101   rtx src = operands[1];
4102   rtx element = operands[2];
4103   rtx v4si_tmp = operands[3];
4104   int value;
4105
4106   if (!BYTES_BIG_ENDIAN)
4107     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
4108
4109   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4110      instruction.  */
4111   value = INTVAL (element);
4112   if (value != 0)
4113     {
4114       if (GET_CODE (v4si_tmp) == SCRATCH)
4115         v4si_tmp = gen_reg_rtx (V4SImode);
4116       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4117     }
4118   else
4119     v4si_tmp = src;
4120
4121   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
4122   DONE;
4123 })
4124
4125 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
4126 ;; where <type> is a floating point type that supported by the hardware that is
4127 ;; not double.  First convert the value to double, and then to the desired
4128 ;; type.
4129 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
4130   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
4131         (any_float:VSX_EXTRACT_FL
4132          (vec_select:SI
4133           (match_operand:V4SI 1 "gpc_reg_operand" "v")
4134           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4135    (clobber (match_scratch:V4SI 3 "=v"))
4136    (clobber (match_scratch:DF 4 "=wa"))]
4137   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4138   "#"
4139   "&& 1"
4140   [(const_int 0)]
4141 {
4142   rtx dest = operands[0];
4143   rtx src = operands[1];
4144   rtx element = operands[2];
4145   rtx v4si_tmp = operands[3];
4146   rtx df_tmp = operands[4];
4147   int value;
4148
4149   if (!BYTES_BIG_ENDIAN)
4150     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
4151
4152   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4153      instruction.  */
4154   value = INTVAL (element);
4155   if (value != 0)
4156     {
4157       if (GET_CODE (v4si_tmp) == SCRATCH)
4158         v4si_tmp = gen_reg_rtx (V4SImode);
4159       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4160     }
4161   else
4162     v4si_tmp = src;
4163
4164   if (GET_CODE (df_tmp) == SCRATCH)
4165     df_tmp = gen_reg_rtx (DFmode);
4166
4167   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
4168
4169   if (<MODE>mode == SFmode)
4170     emit_insn (gen_truncdfsf2 (dest, df_tmp));
4171   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
4172     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
4173   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
4174            && TARGET_FLOAT128_HW)
4175     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
4176   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
4177     emit_insn (gen_extenddfif2 (dest, df_tmp));
4178   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
4179     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
4180   else
4181     gcc_unreachable ();
4182
4183   DONE;
4184 })
4185
4186 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
4187 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
4188 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
4189 ;; vector short or vector unsigned short.
4190 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
4191   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4192         (float:FL_CONV
4193          (vec_select:<VSX_EXTRACT_I:VS_scalar>
4194           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4195           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4196    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
4197   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4198    && TARGET_P9_VECTOR"
4199   "#"
4200   "&& reload_completed"
4201   [(parallel [(set (match_dup 3)
4202                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
4203                     (match_dup 1)
4204                     (parallel [(match_dup 2)])))
4205               (clobber (scratch:SI))])
4206    (set (match_dup 4)
4207         (sign_extend:DI (match_dup 3)))
4208    (set (match_dup 0)
4209         (float:<FL_CONV:MODE> (match_dup 4)))]
4210 {
4211   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4212 }
4213   [(set_attr "isa" "<FL_CONV:VSisa>")])
4214
4215 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
4216   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4217         (unsigned_float:FL_CONV
4218          (vec_select:<VSX_EXTRACT_I:VS_scalar>
4219           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4220           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4221    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
4222   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4223    && TARGET_P9_VECTOR"
4224   "#"
4225   "&& reload_completed"
4226   [(parallel [(set (match_dup 3)
4227                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
4228                     (match_dup 1)
4229                     (parallel [(match_dup 2)])))
4230               (clobber (scratch:SI))])
4231    (set (match_dup 0)
4232         (float:<FL_CONV:MODE> (match_dup 4)))]
4233 {
4234   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4235 }
4236   [(set_attr "isa" "<FL_CONV:VSisa>")])
4237
4238 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
4239 (define_insn "vsx_set_<mode>_p9"
4240   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
4241         (unspec:VSX_EXTRACT_I
4242          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
4243           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
4244           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
4245          UNSPEC_VSX_SET))]
4246   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4247 {
4248   int ele = INTVAL (operands[3]);
4249   int nunits = GET_MODE_NUNITS (<MODE>mode);
4250
4251   if (!BYTES_BIG_ENDIAN)
4252     ele = nunits - 1 - ele;
4253
4254   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
4255   if (<MODE>mode == V4SImode)
4256     return "xxinsertw %x0,%x2,%3";
4257   else
4258     return "vinsert<wd> %0,%2,%3";
4259 }
4260   [(set_attr "type" "vecperm")])
4261
4262 (define_insn_and_split "vsx_set_v4sf_p9"
4263   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4264         (unspec:V4SF
4265          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4266           (match_operand:SF 2 "gpc_reg_operand" "wa")
4267           (match_operand:QI 3 "const_0_to_3_operand" "n")]
4268          UNSPEC_VSX_SET))
4269    (clobber (match_scratch:SI 4 "=&wa"))]
4270   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4271   "#"
4272   "&& reload_completed"
4273   [(set (match_dup 5)
4274         (unspec:V4SF [(match_dup 2)]
4275                      UNSPEC_VSX_CVDPSPN))
4276    (parallel [(set (match_dup 4)
4277                    (vec_select:SI (match_dup 6)
4278                                   (parallel [(match_dup 7)])))
4279               (clobber (scratch:SI))])
4280    (set (match_dup 8)
4281         (unspec:V4SI [(match_dup 8)
4282                       (match_dup 4)
4283                       (match_dup 3)]
4284                      UNSPEC_VSX_SET))]
4285 {
4286   unsigned int tmp_regno = reg_or_subregno (operands[4]);
4287
4288   operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4289   operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4290   operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4291   operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4292 }
4293   [(set_attr "type" "vecperm")
4294    (set_attr "length" "12")
4295    (set_attr "isa" "p9v")])
4296
4297 ;; Special case setting 0.0f to a V4SF element
4298 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4299   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4300         (unspec:V4SF
4301          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4302           (match_operand:SF 2 "zero_fp_constant" "j")
4303           (match_operand:QI 3 "const_0_to_3_operand" "n")]
4304          UNSPEC_VSX_SET))
4305    (clobber (match_scratch:SI 4 "=&wa"))]
4306   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4307   "#"
4308   "&& reload_completed"
4309   [(set (match_dup 4)
4310         (const_int 0))
4311    (set (match_dup 5)
4312         (unspec:V4SI [(match_dup 5)
4313                       (match_dup 4)
4314                       (match_dup 3)]
4315                      UNSPEC_VSX_SET))]
4316 {
4317   operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4318 }
4319   [(set_attr "type" "vecperm")
4320    (set_attr "length" "8")
4321    (set_attr "isa" "p9v")])
4322
4323 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4324 ;; that is in the default scalar position (1 for big endian, 2 for little
4325 ;; endian).  We just need to do an xxinsertw since the element is in the
4326 ;; correct location.
4327
4328 (define_insn "*vsx_insert_extract_v4sf_p9"
4329   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4330         (unspec:V4SF
4331          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4332           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4333                          (parallel
4334                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4335           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4336          UNSPEC_VSX_SET))]
4337   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4338    && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4339 {
4340   int ele = INTVAL (operands[4]);
4341
4342   if (!BYTES_BIG_ENDIAN)
4343     ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4344
4345   operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4346   return "xxinsertw %x0,%x2,%4";
4347 }
4348   [(set_attr "type" "vecperm")])
4349
4350 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4351 ;; that is in the default scalar position (1 for big endian, 2 for little
4352 ;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4353
4354 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4355   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4356         (unspec:V4SF
4357          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4358           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4359                          (parallel
4360                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4361           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4362          UNSPEC_VSX_SET))
4363    (clobber (match_scratch:SI 5 "=&wa"))]
4364   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4365    && TARGET_P9_VECTOR && TARGET_POWERPC64
4366    && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4367   "#"
4368   "&& 1"
4369   [(parallel [(set (match_dup 5)
4370                    (vec_select:SI (match_dup 6)
4371                                   (parallel [(match_dup 3)])))
4372               (clobber (scratch:SI))])
4373    (set (match_dup 7)
4374         (unspec:V4SI [(match_dup 8)
4375                       (match_dup 5)
4376                       (match_dup 4)]
4377                      UNSPEC_VSX_SET))]
4378 {
4379   if (GET_CODE (operands[5]) == SCRATCH)
4380     operands[5] = gen_reg_rtx (SImode);
4381
4382   operands[6] = gen_lowpart (V4SImode, operands[2]);
4383   operands[7] = gen_lowpart (V4SImode, operands[0]);
4384   operands[8] = gen_lowpart (V4SImode, operands[1]);
4385 }
4386   [(set_attr "type" "vecperm")
4387    (set_attr "isa" "p9v")])
4388
4389 ;; Expanders for builtins
4390 (define_expand "vsx_mergel_<mode>"
4391   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4392    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4393    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4394   "VECTOR_MEM_VSX_P (<MODE>mode)"
4395 {
4396   rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4397   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4398   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4399   emit_insn (gen_rtx_SET (operands[0], x));
4400   DONE;
4401 })
4402
4403 (define_expand "vsx_mergeh_<mode>"
4404   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4405    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4406    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4407   "VECTOR_MEM_VSX_P (<MODE>mode)"
4408 {
4409   rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4410   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4411   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4412   emit_insn (gen_rtx_SET (operands[0], x));
4413   DONE;
4414 })
4415
4416 ;; V2DF/V2DI splat
4417 ;; We separate the register splat insn from the memory splat insn to force the
4418 ;; register allocator to generate the indexed form of the SPLAT when it is
4419 ;; given an offsettable memory reference.  Otherwise, if the register and
4420 ;; memory insns were combined into a single insn, the register allocator will
4421 ;; load the value into a register, and then do a double word permute.
4422 (define_expand "vsx_splat_<mode>"
4423   [(set (match_operand:VSX_D 0 "vsx_register_operand")
4424         (vec_duplicate:VSX_D
4425          (match_operand:<VS_scalar> 1 "input_operand")))]
4426   "VECTOR_MEM_VSX_P (<MODE>mode)"
4427 {
4428   rtx op1 = operands[1];
4429   if (MEM_P (op1))
4430     operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4431   else if (!REG_P (op1))
4432     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4433 })
4434
4435 (define_insn "vsx_splat_<mode>_reg"
4436   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4437         (vec_duplicate:VSX_D
4438          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4439   "VECTOR_MEM_VSX_P (<MODE>mode)"
4440   "@
4441    xxpermdi %x0,%x1,%x1,0
4442    mtvsrdd %x0,%1,%1"
4443   [(set_attr "type" "vecperm,vecmove")])
4444
4445 (define_insn "vsx_splat_<mode>_mem"
4446   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4447         (vec_duplicate:VSX_D
4448          (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4449   "VECTOR_MEM_VSX_P (<MODE>mode)"
4450   "lxvdsx %x0,%y1"
4451   [(set_attr "type" "vecload")])
4452
4453 ;; V4SI splat support
4454 (define_insn "vsx_splat_v4si"
4455   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4456         (vec_duplicate:V4SI
4457          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4458   "TARGET_P9_VECTOR"
4459   "@
4460    mtvsrws %x0,%1
4461    lxvwsx %x0,%y1"
4462   [(set_attr "type" "vecperm,vecload")])
4463
4464 ;; SImode is not currently allowed in vector registers.  This pattern
4465 ;; allows us to use direct move to get the value in a vector register
4466 ;; so that we can use XXSPLTW
4467 (define_insn "vsx_splat_v4si_di"
4468   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4469         (vec_duplicate:V4SI
4470          (truncate:SI
4471           (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4472   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4473   "@
4474    xxspltw %x0,%x1,1
4475    mtvsrws %x0,%1"
4476   [(set_attr "type" "vecperm")
4477    (set_attr "isa" "p8v,*")])
4478
4479 ;; V4SF splat (ISA 3.0)
4480 (define_insn_and_split "vsx_splat_v4sf"
4481   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4482         (vec_duplicate:V4SF
4483          (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4484   "TARGET_P9_VECTOR"
4485   "@
4486    lxvwsx %x0,%y1
4487    #
4488    mtvsrws %x0,%1"
4489   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4490   [(set (match_dup 0)
4491         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4492    (set (match_dup 0)
4493         (unspec:V4SF [(match_dup 0)
4494                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4495   ""
4496   [(set_attr "type" "vecload,vecperm,vecperm")
4497    (set_attr "length" "*,8,*")
4498    (set_attr "isa" "*,p8v,*")])
4499
4500 ;; V4SF/V4SI splat from a vector element
4501 (define_insn "vsx_xxspltw_<mode>"
4502   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4503         (vec_duplicate:VSX_W
4504          (vec_select:<VS_scalar>
4505           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4506           (parallel
4507            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4508   "VECTOR_MEM_VSX_P (<MODE>mode)"
4509 {
4510   if (!BYTES_BIG_ENDIAN)
4511     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4512
4513   return "xxspltw %x0,%x1,%2";
4514 }
4515   [(set_attr "type" "vecperm")])
4516
4517 (define_insn "vsx_xxspltw_<mode>_direct"
4518   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4519         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4520                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4521                       UNSPEC_VSX_XXSPLTW))]
4522   "VECTOR_MEM_VSX_P (<MODE>mode)"
4523   "xxspltw %x0,%x1,%2"
4524   [(set_attr "type" "vecperm")])
4525
4526 ;; V16QI/V8HI splat support on ISA 2.07
4527 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4528   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4529         (vec_duplicate:VSX_SPLAT_I
4530          (truncate:<VS_scalar>
4531           (match_operand:DI 1 "altivec_register_operand" "v"))))]
4532   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4533   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4534   [(set_attr "type" "vecperm")])
4535
4536 ;; V2DF/V2DI splat for use by vec_splat builtin
4537 (define_insn "vsx_xxspltd_<mode>"
4538   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4539         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4540                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4541                       UNSPEC_VSX_XXSPLTD))]
4542   "VECTOR_MEM_VSX_P (<MODE>mode)"
4543 {
4544   if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4545       || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4546     return "xxpermdi %x0,%x1,%x1,0";
4547   else
4548     return "xxpermdi %x0,%x1,%x1,3";
4549 }
4550   [(set_attr "type" "vecperm")])
4551
4552 ;; V4SF/V4SI interleave
4553 (define_insn "vsx_xxmrghw_<mode>"
4554   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4555         (vec_select:VSX_W
4556           (vec_concat:<VS_double>
4557             (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4558             (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4559           (parallel [(const_int 0) (const_int 4)
4560                      (const_int 1) (const_int 5)])))]
4561   "VECTOR_MEM_VSX_P (<MODE>mode)"
4562 {
4563   if (BYTES_BIG_ENDIAN)
4564     return "xxmrghw %x0,%x1,%x2";
4565   else
4566     return "xxmrglw %x0,%x2,%x1";
4567 }
4568   [(set_attr "type" "vecperm")])
4569
4570 (define_insn "vsx_xxmrglw_<mode>"
4571   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4572         (vec_select:VSX_W
4573           (vec_concat:<VS_double>
4574             (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4575             (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4576           (parallel [(const_int 2) (const_int 6)
4577                      (const_int 3) (const_int 7)])))]
4578   "VECTOR_MEM_VSX_P (<MODE>mode)"
4579 {
4580   if (BYTES_BIG_ENDIAN)
4581     return "xxmrglw %x0,%x1,%x2";
4582   else
4583     return "xxmrghw %x0,%x2,%x1";
4584 }
4585   [(set_attr "type" "vecperm")])
4586
4587 ;; Shift left double by word immediate
4588 (define_insn "vsx_xxsldwi_<mode>"
4589   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4590         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4591                        (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4592                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
4593                       UNSPEC_VSX_SLDWI))]
4594   "VECTOR_MEM_VSX_P (<MODE>mode)"
4595   "xxsldwi %x0,%x1,%x2,%3"
4596   [(set_attr "type" "vecperm")
4597    (set_attr "isa" "<VSisa>")])
4598
4599 \f
4600 ;; Vector reduction insns and splitters
4601
4602 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4603   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4604         (VEC_reduc:V2DF
4605          (vec_concat:V2DF
4606           (vec_select:DF
4607            (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4608            (parallel [(const_int 1)]))
4609           (vec_select:DF
4610            (match_dup 1)
4611            (parallel [(const_int 0)])))
4612          (match_dup 1)))
4613    (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4614   "VECTOR_UNIT_VSX_P (V2DFmode)"
4615   "#"
4616   ""
4617   [(const_int 0)]
4618 {
4619   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4620              ? gen_reg_rtx (V2DFmode)
4621              : operands[2];
4622   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4623   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4624   DONE;
4625 }
4626   [(set_attr "length" "8")
4627    (set_attr "type" "veccomplex")])
4628
4629 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4630   [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4631         (VEC_reduc:V4SF
4632          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4633          (match_operand:V4SF 1 "vfloat_operand" "wa")))
4634    (clobber (match_scratch:V4SF 2 "=&wa"))
4635    (clobber (match_scratch:V4SF 3 "=&wa"))]
4636   "VECTOR_UNIT_VSX_P (V4SFmode)"
4637   "#"
4638   ""
4639   [(const_int 0)]
4640 {
4641   rtx op0 = operands[0];
4642   rtx op1 = operands[1];
4643   rtx tmp2, tmp3, tmp4;
4644
4645   if (can_create_pseudo_p ())
4646     {
4647       tmp2 = gen_reg_rtx (V4SFmode);
4648       tmp3 = gen_reg_rtx (V4SFmode);
4649       tmp4 = gen_reg_rtx (V4SFmode);
4650     }
4651   else
4652     {
4653       tmp2 = operands[2];
4654       tmp3 = operands[3];
4655       tmp4 = tmp2;
4656     }
4657
4658   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4659   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4660   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4661   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4662   DONE;
4663 }
4664   [(set_attr "length" "16")
4665    (set_attr "type" "veccomplex")])
4666
4667 ;; Combiner patterns with the vector reduction patterns that knows we can get
4668 ;; to the top element of the V2DF array without doing an extract.
4669
4670 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4671   [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4672         (vec_select:DF
4673          (VEC_reduc:V2DF
4674           (vec_concat:V2DF
4675            (vec_select:DF
4676             (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4677             (parallel [(const_int 1)]))
4678            (vec_select:DF
4679             (match_dup 1)
4680             (parallel [(const_int 0)])))
4681           (match_dup 1))
4682          (parallel [(const_int 1)])))
4683    (clobber (match_scratch:DF 2 "=0,&wa"))]
4684   "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4685   "#"
4686   ""
4687   [(const_int 0)]
4688 {
4689   rtx hi = gen_highpart (DFmode, operands[1]);
4690   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4691             ? gen_reg_rtx (DFmode)
4692             : operands[2];
4693
4694   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4695   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4696   DONE;
4697 }
4698   [(set_attr "length" "8")
4699    (set_attr "type" "veccomplex")])
4700
4701 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4702   [(set (match_operand:SF 0 "vfloat_operand" "=f")
4703         (vec_select:SF
4704          (VEC_reduc:V4SF
4705           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4706           (match_operand:V4SF 1 "vfloat_operand" "wa"))
4707          (parallel [(const_int 3)])))
4708    (clobber (match_scratch:V4SF 2 "=&wa"))
4709    (clobber (match_scratch:V4SF 3 "=&wa"))
4710    (clobber (match_scratch:V4SF 4 "=0"))]
4711   "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4712   "#"
4713   ""
4714   [(const_int 0)]
4715 {
4716   rtx op0 = operands[0];
4717   rtx op1 = operands[1];
4718   rtx tmp2, tmp3, tmp4, tmp5;
4719
4720   if (can_create_pseudo_p ())
4721     {
4722       tmp2 = gen_reg_rtx (V4SFmode);
4723       tmp3 = gen_reg_rtx (V4SFmode);
4724       tmp4 = gen_reg_rtx (V4SFmode);
4725       tmp5 = gen_reg_rtx (V4SFmode);
4726     }
4727   else
4728     {
4729       tmp2 = operands[2];
4730       tmp3 = operands[3];
4731       tmp4 = tmp2;
4732       tmp5 = operands[4];
4733     }
4734
4735   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4736   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4737   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4738   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4739   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4740   DONE;
4741 }
4742   [(set_attr "length" "20")
4743    (set_attr "type" "veccomplex")])
4744
4745 \f
4746 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4747 (define_peephole
4748   [(set (match_operand:P 0 "base_reg_operand")
4749         (match_operand:P 1 "short_cint_operand"))
4750    (set (match_operand:VSX_M 2 "vsx_register_operand")
4751         (mem:VSX_M (plus:P (match_dup 0)
4752                            (match_operand:P 3 "int_reg_operand"))))]
4753   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4754   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4755   [(set_attr "length" "8")
4756    (set_attr "type" "vecload")])
4757
4758 (define_peephole
4759   [(set (match_operand:P 0 "base_reg_operand")
4760         (match_operand:P 1 "short_cint_operand"))
4761    (set (match_operand:VSX_M 2 "vsx_register_operand")
4762         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4763                            (match_dup 0))))]
4764   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4765   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4766   [(set_attr "length" "8")
4767    (set_attr "type" "vecload")])
4768
4769 \f
4770 ;; ISA 3.0 vector extend sign support
4771
4772 (define_insn "vsx_sign_extend_qi_<mode>"
4773   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4774         (unspec:VSINT_84
4775          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4776          UNSPEC_VSX_SIGN_EXTEND))]
4777   "TARGET_P9_VECTOR"
4778   "vextsb2<wd> %0,%1"
4779   [(set_attr "type" "vecexts")])
4780
4781 (define_insn "vsx_sign_extend_hi_<mode>"
4782   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4783         (unspec:VSINT_84
4784          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4785          UNSPEC_VSX_SIGN_EXTEND))]
4786   "TARGET_P9_VECTOR"
4787   "vextsh2<wd> %0,%1"
4788   [(set_attr "type" "vecexts")])
4789
4790 (define_insn "*vsx_sign_extend_si_v2di"
4791   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4792         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4793                      UNSPEC_VSX_SIGN_EXTEND))]
4794   "TARGET_P9_VECTOR"
4795   "vextsw2d %0,%1"
4796   [(set_attr "type" "vecexts")])
4797
4798 \f
4799 ;; ISA 3.0 Binary Floating-Point Support
4800
4801 ;; VSX Scalar Extract Exponent Quad-Precision
4802 (define_insn "xsxexpqp_<mode>"
4803   [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4804         (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4805          UNSPEC_VSX_SXEXPDP))]
4806   "TARGET_P9_VECTOR"
4807   "xsxexpqp %0,%1"
4808   [(set_attr "type" "vecmove")])
4809
4810 ;; VSX Scalar Extract Exponent Double-Precision
4811 (define_insn "xsxexpdp"
4812   [(set (match_operand:DI 0 "register_operand" "=r")
4813         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4814          UNSPEC_VSX_SXEXPDP))]
4815   "TARGET_P9_VECTOR && TARGET_64BIT"
4816   "xsxexpdp %0,%x1"
4817   [(set_attr "type" "integer")])
4818
4819 ;; VSX Scalar Extract Significand Quad-Precision
4820 (define_insn "xsxsigqp_<mode>"
4821   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4822         (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4823          UNSPEC_VSX_SXSIG))]
4824   "TARGET_P9_VECTOR"
4825   "xsxsigqp %0,%1"
4826   [(set_attr "type" "vecmove")])
4827
4828 ;; VSX Scalar Extract Significand Double-Precision
4829 (define_insn "xsxsigdp"
4830   [(set (match_operand:DI 0 "register_operand" "=r")
4831         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4832          UNSPEC_VSX_SXSIG))]
4833   "TARGET_P9_VECTOR && TARGET_64BIT"
4834   "xsxsigdp %0,%x1"
4835   [(set_attr "type" "integer")])
4836
4837 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4838 (define_insn "xsiexpqpf_<mode>"
4839   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4840         (unspec:IEEE128
4841          [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4842           (match_operand:DI 2 "altivec_register_operand" "v")]
4843          UNSPEC_VSX_SIEXPQP))]
4844   "TARGET_P9_VECTOR"
4845   "xsiexpqp %0,%1,%2"
4846   [(set_attr "type" "vecmove")])
4847
4848 ;; VSX Scalar Insert Exponent Quad-Precision
4849 (define_insn "xsiexpqp_<mode>"
4850   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4851         (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4852                          (match_operand:DI 2 "altivec_register_operand" "v")]
4853          UNSPEC_VSX_SIEXPQP))]
4854   "TARGET_P9_VECTOR"
4855   "xsiexpqp %0,%1,%2"
4856   [(set_attr "type" "vecmove")])
4857
4858 ;; VSX Scalar Insert Exponent Double-Precision
4859 (define_insn "xsiexpdp"
4860   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4861         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4862                     (match_operand:DI 2 "register_operand" "r")]
4863          UNSPEC_VSX_SIEXPDP))]
4864   "TARGET_P9_VECTOR && TARGET_64BIT"
4865   "xsiexpdp %x0,%1,%2"
4866   [(set_attr "type" "fpsimple")])
4867
4868 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4869 (define_insn "xsiexpdpf"
4870   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4871         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4872                     (match_operand:DI 2 "register_operand" "r")]
4873          UNSPEC_VSX_SIEXPDP))]
4874   "TARGET_P9_VECTOR && TARGET_64BIT"
4875   "xsiexpdp %x0,%1,%2"
4876   [(set_attr "type" "fpsimple")])
4877
4878 ;; VSX Scalar Compare Exponents Double-Precision
4879 (define_expand "xscmpexpdp_<code>"
4880   [(set (match_dup 3)
4881         (compare:CCFP
4882          (unspec:DF
4883           [(match_operand:DF 1 "vsx_register_operand" "wa")
4884            (match_operand:DF 2 "vsx_register_operand" "wa")]
4885           UNSPEC_VSX_SCMPEXPDP)
4886          (const_int 0)))
4887    (set (match_operand:SI 0 "register_operand" "=r")
4888         (CMP_TEST:SI (match_dup 3)
4889                      (const_int 0)))]
4890   "TARGET_P9_VECTOR"
4891 {
4892   if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
4893     {
4894       emit_move_insn (operands[0], const0_rtx);
4895       DONE;
4896     }
4897
4898   operands[3] = gen_reg_rtx (CCFPmode);
4899 })
4900
4901 (define_insn "*xscmpexpdp"
4902   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4903         (compare:CCFP
4904          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4905                      (match_operand:DF 2 "vsx_register_operand" "wa")]
4906           UNSPEC_VSX_SCMPEXPDP)
4907          (match_operand:SI 3 "zero_constant" "j")))]
4908   "TARGET_P9_VECTOR"
4909   "xscmpexpdp %0,%x1,%x2"
4910   [(set_attr "type" "fpcompare")])
4911
4912 ;; VSX Scalar Compare Exponents Quad-Precision
4913 (define_expand "xscmpexpqp_<code>_<mode>"
4914   [(set (match_dup 3)
4915         (compare:CCFP
4916          (unspec:IEEE128
4917           [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4918            (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4919           UNSPEC_VSX_SCMPEXPQP)
4920          (const_int 0)))
4921    (set (match_operand:SI 0 "register_operand" "=r")
4922         (CMP_TEST:SI (match_dup 3)
4923                      (const_int 0)))]
4924   "TARGET_P9_VECTOR"
4925 {
4926   if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
4927     {
4928       emit_move_insn (operands[0], const0_rtx);
4929       DONE;
4930     }
4931
4932   operands[3] = gen_reg_rtx (CCFPmode);
4933 })
4934
4935 (define_insn "*xscmpexpqp"
4936   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4937         (compare:CCFP
4938          (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4939                           (match_operand:IEEE128 2 "altivec_register_operand" "v")]
4940           UNSPEC_VSX_SCMPEXPQP)
4941          (match_operand:SI 3 "zero_constant" "j")))]
4942   "TARGET_P9_VECTOR"
4943   "xscmpexpqp %0,%1,%2"
4944   [(set_attr "type" "fpcompare")])
4945
4946 ;; VSX Scalar Test Data Class Quad-Precision
4947 ;;  (Expansion for scalar_test_data_class (__ieee128, int))
4948 ;;   (Has side effect of setting the lt bit if operand 1 is negative,
4949 ;;    setting the eq bit if any of the conditions tested by operand 2
4950 ;;    are satisfied, and clearing the gt and undordered bits to zero.)
4951 (define_expand "xststdcqp_<mode>"
4952   [(set (match_dup 3)
4953         (compare:CCFP
4954          (unspec:IEEE128
4955           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4956            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4957           UNSPEC_VSX_STSTDC)
4958          (const_int 0)))
4959    (set (match_operand:SI 0 "register_operand" "=r")
4960         (eq:SI (match_dup 3)
4961                (const_int 0)))]
4962   "TARGET_P9_VECTOR"
4963 {
4964   operands[3] = gen_reg_rtx (CCFPmode);
4965 })
4966
4967 ;; VSX Scalar Test Data Class Double- and Single-Precision
4968 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4969 ;;   if any of the conditions tested by operand 2 are satisfied.
4970 ;;   The gt and unordered bits are cleared to zero.)
4971 (define_expand "xststdc<sd>p"
4972   [(set (match_dup 3)
4973         (compare:CCFP
4974          (unspec:SFDF
4975           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4976            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4977           UNSPEC_VSX_STSTDC)
4978          (match_dup 4)))
4979    (set (match_operand:SI 0 "register_operand" "=r")
4980         (eq:SI (match_dup 3)
4981                (const_int 0)))]
4982   "TARGET_P9_VECTOR"
4983 {
4984   operands[3] = gen_reg_rtx (CCFPmode);
4985   operands[4] = CONST0_RTX (SImode);
4986 })
4987
4988 ;; The VSX Scalar Test Negative Quad-Precision
4989 (define_expand "xststdcnegqp_<mode>"
4990   [(set (match_dup 2)
4991         (compare:CCFP
4992          (unspec:IEEE128
4993           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4994            (const_int 0)]
4995           UNSPEC_VSX_STSTDC)
4996          (const_int 0)))
4997    (set (match_operand:SI 0 "register_operand" "=r")
4998         (lt:SI (match_dup 2)
4999                (const_int 0)))]
5000   "TARGET_P9_VECTOR"
5001 {
5002   operands[2] = gen_reg_rtx (CCFPmode);
5003 })
5004
5005 ;; The VSX Scalar Test Negative Double- and Single-Precision
5006 (define_expand "xststdcneg<sd>p"
5007   [(set (match_dup 2)
5008         (compare:CCFP
5009          (unspec:SFDF
5010           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5011            (const_int 0)]
5012           UNSPEC_VSX_STSTDC)
5013          (match_dup 3)))
5014    (set (match_operand:SI 0 "register_operand" "=r")
5015         (lt:SI (match_dup 2)
5016                (const_int 0)))]
5017   "TARGET_P9_VECTOR"
5018 {
5019   operands[2] = gen_reg_rtx (CCFPmode);
5020   operands[3] = CONST0_RTX (SImode);
5021 })
5022
5023 (define_insn "*xststdcqp_<mode>"
5024   [(set (match_operand:CCFP 0 "" "=y")
5025         (compare:CCFP
5026          (unspec:IEEE128
5027           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5028            (match_operand:SI 2 "u7bit_cint_operand" "n")]
5029           UNSPEC_VSX_STSTDC)
5030          (const_int 0)))]
5031   "TARGET_P9_VECTOR"
5032   "xststdcqp %0,%1,%2"
5033   [(set_attr "type" "fpcompare")])
5034
5035 (define_insn "*xststdc<sd>p"
5036   [(set (match_operand:CCFP 0 "" "=y")
5037         (compare:CCFP
5038          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5039                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
5040           UNSPEC_VSX_STSTDC)
5041          (match_operand:SI 3 "zero_constant" "j")))]
5042   "TARGET_P9_VECTOR"
5043   "xststdc<sd>p %0,%x1,%2"
5044   [(set_attr "type" "fpcompare")])
5045
5046 ;; VSX Vector Extract Exponent Double and Single Precision
5047 (define_insn "xvxexp<sd>p"
5048   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5049         (unspec:VSX_F
5050          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5051          UNSPEC_VSX_VXEXP))]
5052   "TARGET_P9_VECTOR"
5053   "xvxexp<sd>p %x0,%x1"
5054   [(set_attr "type" "vecsimple")])
5055
5056 ;; VSX Vector Extract Significand Double and Single Precision
5057 (define_insn "xvxsig<sd>p"
5058   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5059         (unspec:VSX_F
5060          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5061          UNSPEC_VSX_VXSIG))]
5062   "TARGET_P9_VECTOR"
5063   "xvxsig<sd>p %x0,%x1"
5064   [(set_attr "type" "vecsimple")])
5065
5066 ;; VSX Vector Insert Exponent Double and Single Precision
5067 (define_insn "xviexp<sd>p"
5068   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5069         (unspec:VSX_F
5070          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5071           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
5072          UNSPEC_VSX_VIEXP))]
5073   "TARGET_P9_VECTOR"
5074   "xviexp<sd>p %x0,%x1,%x2"
5075   [(set_attr "type" "vecsimple")])
5076
5077 ;; VSX Vector Test Data Class Double and Single Precision
5078 ;; The corresponding elements of the result vector are all ones
5079 ;; if any of the conditions tested by operand 3 are satisfied.
5080 (define_insn "xvtstdc<sd>p"
5081   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
5082         (unspec:<VSI>
5083          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5084           (match_operand:SI 2 "u7bit_cint_operand" "n")]
5085          UNSPEC_VSX_VTSTDC))]
5086   "TARGET_P9_VECTOR"
5087   "xvtstdc<sd>p %x0,%x1,%2"
5088   [(set_attr "type" "vecsimple")])
5089
5090 ;; ISA 3.0 String Operations Support
5091
5092 ;; Compare vectors producing a vector result and a predicate, setting CR6
5093 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
5094 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
5095 ;; need to match v4sf, v2df, or v2di modes because those are expanded
5096 ;; to use Power8 instructions.
5097 (define_insn "*vsx_ne_<mode>_p"
5098   [(set (reg:CC CR6_REGNO)
5099         (unspec:CC
5100          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
5101                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
5102          UNSPEC_PREDICATE))
5103    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
5104         (ne:VSX_EXTRACT_I (match_dup 1)
5105                           (match_dup 2)))]
5106   "TARGET_P9_VECTOR"
5107   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5108   [(set_attr "type" "vecsimple")])
5109
5110 (define_insn "*vector_nez_<mode>_p"
5111   [(set (reg:CC CR6_REGNO)
5112         (unspec:CC [(unspec:VI
5113                      [(match_operand:VI 1 "gpc_reg_operand" "v")
5114                       (match_operand:VI 2 "gpc_reg_operand" "v")]
5115                      UNSPEC_NEZ_P)]
5116          UNSPEC_PREDICATE))
5117    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
5118         (unspec:VI [(match_dup 1)
5119                     (match_dup 2)]
5120          UNSPEC_NEZ_P))]
5121   "TARGET_P9_VECTOR"
5122   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5123   [(set_attr "type" "vecsimple")])
5124
5125 ;; Return first position of match between vectors using natural order
5126 ;; for both LE and BE execution modes.
5127 (define_expand "first_match_index_<mode>"
5128   [(match_operand:SI 0 "register_operand")
5129    (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5130                (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5131   UNSPEC_VSX_FIRST_MATCH_INDEX)]
5132   "TARGET_P9_VECTOR"
5133 {
5134   int sh;
5135
5136   rtx cmp_result = gen_reg_rtx (<MODE>mode);
5137   rtx not_result = gen_reg_rtx (<MODE>mode);
5138
5139   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5140                                             operands[2]));
5141   emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
5142
5143   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5144
5145   if (<MODE>mode == V16QImode)
5146     {
5147       if (!BYTES_BIG_ENDIAN)
5148         emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
5149       else
5150         emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
5151     }
5152   else
5153     {
5154       rtx tmp = gen_reg_rtx (SImode);
5155       if (!BYTES_BIG_ENDIAN)
5156         emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
5157       else
5158         emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
5159       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5160     }
5161   DONE;
5162 })
5163
5164 ;; Return first position of match between vectors or end of string (EOS) using
5165 ;; natural element order for both LE and BE execution modes.
5166 (define_expand "first_match_or_eos_index_<mode>"
5167   [(match_operand:SI 0 "register_operand")
5168    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5169    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5170   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
5171   "TARGET_P9_VECTOR"
5172 {
5173   int sh;
5174   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5175   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5176   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5177   rtx and_result = gen_reg_rtx (<MODE>mode);
5178   rtx result = gen_reg_rtx (<MODE>mode);
5179   rtx vzero = gen_reg_rtx (<MODE>mode);
5180
5181   /* Vector with zeros in elements that correspond to zeros in operands.  */
5182   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5183   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5184   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5185   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5186
5187   /* Vector with ones in elments that do not match.  */
5188   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5189                                              operands[2]));
5190
5191   /* Create vector with ones in elements where there was a zero in one of
5192      the source elements or the elements that match.  */
5193   emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
5194   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5195
5196   if (<MODE>mode == V16QImode)
5197     {
5198       if (!BYTES_BIG_ENDIAN)
5199         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5200       else
5201         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5202     }
5203   else
5204     {
5205       rtx tmp = gen_reg_rtx (SImode);
5206       if (!BYTES_BIG_ENDIAN)
5207         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5208       else
5209         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5210       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5211     }
5212   DONE;
5213 })
5214
5215 ;; Return first position of mismatch between vectors using natural
5216 ;; element order for both LE and BE execution modes.
5217 (define_expand "first_mismatch_index_<mode>"
5218   [(match_operand:SI 0 "register_operand")
5219    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5220    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5221   UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
5222   "TARGET_P9_VECTOR"
5223 {
5224   int sh;
5225   rtx cmp_result = gen_reg_rtx (<MODE>mode);
5226
5227   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5228                                             operands[2]));
5229   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5230
5231   if (<MODE>mode == V16QImode)
5232     {
5233       if (!BYTES_BIG_ENDIAN)
5234         emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
5235       else
5236         emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
5237     }
5238   else
5239     {
5240       rtx tmp = gen_reg_rtx (SImode);
5241       if (!BYTES_BIG_ENDIAN)
5242         emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
5243       else
5244         emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
5245       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5246     }
5247   DONE;
5248 })
5249
5250 ;; Return first position of mismatch between vectors or end of string (EOS)
5251 ;; using natural element order for both LE and BE execution modes.
5252 (define_expand "first_mismatch_or_eos_index_<mode>"
5253   [(match_operand:SI 0 "register_operand")
5254    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5255    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5256   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
5257   "TARGET_P9_VECTOR"
5258 {
5259   int sh;
5260   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5261   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5262   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5263   rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
5264   rtx and_result = gen_reg_rtx (<MODE>mode);
5265   rtx result = gen_reg_rtx (<MODE>mode);
5266   rtx vzero = gen_reg_rtx (<MODE>mode);
5267
5268   /* Vector with zeros in elements that correspond to zeros in operands.  */
5269   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5270
5271   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5272   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5273   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5274
5275   /* Vector with ones in elments that match.  */
5276   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5277                                              operands[2]));
5278   emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
5279
5280   /* Create vector with ones in elements where there was a zero in one of
5281      the source elements or the elements did not match.  */
5282   emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5283   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5284
5285   if (<MODE>mode == V16QImode)
5286     {
5287       if (!BYTES_BIG_ENDIAN)
5288         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5289       else
5290         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5291     }
5292   else
5293     {
5294       rtx tmp = gen_reg_rtx (SImode);
5295       if (!BYTES_BIG_ENDIAN)
5296         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5297       else
5298         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5299       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5300     }
5301   DONE;
5302 })
5303
5304 ;; Load VSX Vector with Length
5305 (define_expand "lxvl"
5306   [(set (match_dup 3)
5307         (ashift:DI (match_operand:DI 2 "register_operand")
5308                    (const_int 56)))
5309    (set (match_operand:V16QI 0 "vsx_register_operand")
5310         (unspec:V16QI
5311          [(match_operand:DI 1 "gpc_reg_operand")
5312           (mem:V16QI (match_dup 1))
5313           (match_dup 3)]
5314          UNSPEC_LXVL))]
5315   "TARGET_P9_VECTOR && TARGET_64BIT"
5316 {
5317   operands[3] = gen_reg_rtx (DImode);
5318 })
5319
5320 (define_insn "*lxvl"
5321   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5322         (unspec:V16QI
5323          [(match_operand:DI 1 "gpc_reg_operand" "b")
5324           (mem:V16QI (match_dup 1))
5325           (match_operand:DI 2 "register_operand" "r")]
5326          UNSPEC_LXVL))]
5327   "TARGET_P9_VECTOR && TARGET_64BIT"
5328   "lxvl %x0,%1,%2"
5329   [(set_attr "type" "vecload")])
5330
5331 (define_insn "lxvll"
5332   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5333         (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5334                        (mem:V16QI (match_dup 1))
5335                        (match_operand:DI 2 "register_operand" "r")]
5336                       UNSPEC_LXVLL))]
5337   "TARGET_P9_VECTOR"
5338   "lxvll %x0,%1,%2"
5339   [(set_attr "type" "vecload")])
5340
5341 ;; Expand for builtin xl_len_r
5342 (define_expand "xl_len_r"
5343   [(match_operand:V16QI 0 "vsx_register_operand")
5344    (match_operand:DI 1 "register_operand")
5345    (match_operand:DI 2 "register_operand")]
5346   ""
5347 {
5348   rtx shift_mask = gen_reg_rtx (V16QImode);
5349   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5350   rtx tmp = gen_reg_rtx (DImode);
5351
5352   emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
5353   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5354   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5355   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5356              shift_mask));
5357   DONE;
5358 })
5359
5360 (define_insn "stxvll"
5361   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5362         (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5363                        (mem:V16QI (match_dup 1))
5364                        (match_operand:DI 2 "register_operand" "r")]
5365                       UNSPEC_STXVLL))]
5366   "TARGET_P9_VECTOR"
5367   "stxvll %x0,%1,%2"
5368   [(set_attr "type" "vecstore")])
5369
5370 ;; Store VSX Vector with Length
5371 (define_expand "stxvl"
5372   [(set (match_dup 3)
5373         (ashift:DI (match_operand:DI 2 "register_operand")
5374                    (const_int 56)))
5375    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5376         (unspec:V16QI
5377          [(match_operand:V16QI 0 "vsx_register_operand")
5378           (mem:V16QI (match_dup 1))
5379           (match_dup 3)]
5380          UNSPEC_STXVL))]
5381   "TARGET_P9_VECTOR && TARGET_64BIT"
5382 {
5383   operands[3] = gen_reg_rtx (DImode);
5384 })
5385
5386 ;; Define optab for vector access with length vectorization exploitation.
5387 (define_expand "len_load_v16qi"
5388   [(match_operand:V16QI 0 "vlogical_operand")
5389    (match_operand:V16QI 1 "memory_operand")
5390    (match_operand:QI 2 "gpc_reg_operand")]
5391   "TARGET_P9_VECTOR && TARGET_64BIT"
5392 {
5393   rtx mem = XEXP (operands[1], 0);
5394   mem = force_reg (DImode, mem);
5395   rtx len = gen_lowpart (DImode, operands[2]);
5396   emit_insn (gen_lxvl (operands[0], mem, len));
5397   DONE;
5398 })
5399
5400 (define_expand "len_store_v16qi"
5401   [(match_operand:V16QI 0 "memory_operand")
5402    (match_operand:V16QI 1 "vlogical_operand")
5403    (match_operand:QI 2 "gpc_reg_operand")
5404   ]
5405   "TARGET_P9_VECTOR && TARGET_64BIT"
5406 {
5407   rtx mem = XEXP (operands[0], 0);
5408   mem = force_reg (DImode, mem);
5409   rtx len = gen_lowpart (DImode, operands[2]);
5410   emit_insn (gen_stxvl (operands[1], mem, len));
5411   DONE;
5412 })
5413
5414 (define_insn "*stxvl"
5415   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5416         (unspec:V16QI
5417          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5418           (mem:V16QI (match_dup 1))
5419           (match_operand:DI 2 "register_operand" "r")]
5420          UNSPEC_STXVL))]
5421   "TARGET_P9_VECTOR && TARGET_64BIT"
5422   "stxvl %x0,%1,%2"
5423   [(set_attr "type" "vecstore")])
5424
5425 ;; Expand for builtin xst_len_r
5426 (define_expand "xst_len_r"
5427   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5428    (match_operand:DI 1 "register_operand" "b")
5429    (match_operand:DI 2 "register_operand" "r")]
5430   "UNSPEC_XST_LEN_R"
5431 {
5432   rtx shift_mask = gen_reg_rtx (V16QImode);
5433   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5434   rtx tmp = gen_reg_rtx (DImode);
5435
5436   emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5437   emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5438              shift_mask));
5439   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5440   emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5441   DONE;
5442 })
5443
5444 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5445 (define_insn "vcmpneb"
5446   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5447          (not:V16QI
5448            (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5449                      (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5450   "TARGET_P9_VECTOR"
5451   "vcmpneb %0,%1,%2"
5452   [(set_attr "type" "vecsimple")])
5453
5454 ;; Vector Compare Not Equal or Zero Byte
5455 (define_insn "vcmpnezb"
5456   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5457         (unspec:V16QI
5458          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5459           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5460          UNSPEC_VCMPNEZB))]
5461   "TARGET_P9_VECTOR"
5462   "vcmpnezb %0,%1,%2"
5463   [(set_attr "type" "vecsimple")])
5464
5465 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5466 (define_insn "vcmpnezb_p"
5467   [(set (reg:CC CR6_REGNO)
5468         (unspec:CC
5469          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5470           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5471          UNSPEC_VCMPNEZB))
5472    (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5473         (unspec:V16QI
5474          [(match_dup 1)
5475           (match_dup 2)]
5476          UNSPEC_VCMPNEZB))]
5477   "TARGET_P9_VECTOR"
5478   "vcmpnezb. %0,%1,%2"
5479   [(set_attr "type" "vecsimple")])
5480
5481 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5482 (define_insn "vcmpneh"
5483   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5484         (not:V8HI
5485           (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5486                    (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5487   "TARGET_P9_VECTOR"
5488   "vcmpneh %0,%1,%2"
5489   [(set_attr "type" "vecsimple")])
5490
5491 ;; Vector Compare Not Equal or Zero Half Word
5492 (define_insn "vcmpnezh"
5493   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5494         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5495                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
5496          UNSPEC_VCMPNEZH))]
5497   "TARGET_P9_VECTOR"
5498   "vcmpnezh %0,%1,%2"
5499   [(set_attr "type" "vecsimple")])
5500
5501 ;; Vector Compare Not Equal Word (specified/not+eq:)
5502 (define_insn "vcmpnew"
5503   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5504         (not:V4SI
5505           (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5506                    (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5507   "TARGET_P9_VECTOR"
5508   "vcmpnew %0,%1,%2"
5509   [(set_attr "type" "vecsimple")])
5510
5511 ;; Vector Compare Not Equal or Zero Word
5512 (define_insn "vcmpnezw"
5513   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5514         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5515                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
5516          UNSPEC_VCMPNEZW))]
5517   "TARGET_P9_VECTOR"
5518   "vcmpnezw %0,%1,%2"
5519   [(set_attr "type" "vecsimple")])
5520
5521 ;; Vector Count Leading Zero Least-Significant Bits Byte
5522 (define_insn "vclzlsbb_<mode>"
5523   [(set (match_operand:SI 0 "register_operand" "=r")
5524         (unspec:SI
5525          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5526          UNSPEC_VCLZLSBB))]
5527   "TARGET_P9_VECTOR"
5528   "vclzlsbb %0,%1"
5529   [(set_attr "type" "vecsimple")])
5530
5531 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5532 (define_insn "vctzlsbb_<mode>"
5533   [(set (match_operand:SI 0 "register_operand" "=r")
5534         (unspec:SI
5535          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5536          UNSPEC_VCTZLSBB))]
5537   "TARGET_P9_VECTOR"
5538   "vctzlsbb %0,%1"
5539   [(set_attr "type" "vecsimple")])
5540
5541 ;; Vector Extract Unsigned Byte Left-Indexed
5542 (define_insn "vextublx"
5543   [(set (match_operand:SI 0 "register_operand" "=r")
5544         (unspec:SI
5545          [(match_operand:SI 1 "register_operand" "r")
5546           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5547          UNSPEC_VEXTUBLX))]
5548   "TARGET_P9_VECTOR"
5549   "vextublx %0,%1,%2"
5550   [(set_attr "type" "vecsimple")])
5551
5552 ;; Vector Extract Unsigned Byte Right-Indexed
5553 (define_insn "vextubrx"
5554   [(set (match_operand:SI 0 "register_operand" "=r")
5555         (unspec:SI
5556          [(match_operand:SI 1 "register_operand" "r")
5557           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5558          UNSPEC_VEXTUBRX))]
5559   "TARGET_P9_VECTOR"
5560   "vextubrx %0,%1,%2"
5561   [(set_attr "type" "vecsimple")])
5562
5563 ;; Vector Extract Unsigned Half Word Left-Indexed
5564 (define_insn "vextuhlx"
5565   [(set (match_operand:SI 0 "register_operand" "=r")
5566         (unspec:SI
5567          [(match_operand:SI 1 "register_operand" "r")
5568           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5569          UNSPEC_VEXTUHLX))]
5570   "TARGET_P9_VECTOR"
5571   "vextuhlx %0,%1,%2"
5572   [(set_attr "type" "vecsimple")])
5573
5574 ;; Vector Extract Unsigned Half Word Right-Indexed
5575 (define_insn "vextuhrx"
5576   [(set (match_operand:SI 0 "register_operand" "=r")
5577         (unspec:SI
5578          [(match_operand:SI 1 "register_operand" "r")
5579           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5580          UNSPEC_VEXTUHRX))]
5581   "TARGET_P9_VECTOR"
5582   "vextuhrx %0,%1,%2"
5583   [(set_attr "type" "vecsimple")])
5584
5585 ;; Vector Extract Unsigned Word Left-Indexed
5586 (define_insn "vextuwlx"
5587   [(set (match_operand:SI 0 "register_operand" "=r")
5588         (unspec:SI
5589          [(match_operand:SI 1 "register_operand" "r")
5590           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5591          UNSPEC_VEXTUWLX))]
5592   "TARGET_P9_VECTOR"
5593   "vextuwlx %0,%1,%2"
5594   [(set_attr "type" "vecsimple")])
5595
5596 ;; Vector Extract Unsigned Word Right-Indexed
5597 (define_insn "vextuwrx"
5598   [(set (match_operand:SI 0 "register_operand" "=r")
5599         (unspec:SI
5600          [(match_operand:SI 1 "register_operand" "r")
5601           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5602          UNSPEC_VEXTUWRX))]
5603   "TARGET_P9_VECTOR"
5604   "vextuwrx %0,%1,%2"
5605   [(set_attr "type" "vecsimple")])
5606
5607 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
5608 ;; endian version needs to adjust the byte number, and the V4SI element in
5609 ;; vinsert4b.
5610 (define_insn "extract4b"
5611   [(set (match_operand:V2DI 0 "vsx_register_operand")
5612        (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5613                      (match_operand:QI 2 "const_0_to_12_operand" "n")]
5614                     UNSPEC_XXEXTRACTUW))]
5615   "TARGET_P9_VECTOR"
5616 {
5617   if (!BYTES_BIG_ENDIAN)
5618     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5619
5620   return "xxextractuw %x0,%x1,%2";
5621 })
5622
5623 (define_expand "insert4b"
5624   [(set (match_operand:V16QI 0 "vsx_register_operand")
5625         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5626                        (match_operand:V16QI 2 "vsx_register_operand")
5627                        (match_operand:QI 3 "const_0_to_12_operand")]
5628                    UNSPEC_XXINSERTW))]
5629   "TARGET_P9_VECTOR"
5630 {
5631   if (!BYTES_BIG_ENDIAN)
5632     {
5633       rtx op1 = operands[1];
5634       rtx v4si_tmp = gen_reg_rtx (V4SImode);
5635       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5636       operands[1] = v4si_tmp;
5637       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5638     }
5639 })
5640
5641 (define_insn "*insert4b_internal"
5642   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5643         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5644                        (match_operand:V16QI 2 "vsx_register_operand" "0")
5645                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
5646                    UNSPEC_XXINSERTW))]
5647   "TARGET_P9_VECTOR"
5648   "xxinsertw %x0,%x1,%3"
5649   [(set_attr "type" "vecperm")])
5650
5651
5652 ;; Generate vector extract four float 32 values from left four elements
5653 ;; of eight element vector of float 16 values.
5654 (define_expand "vextract_fp_from_shorth"
5655   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5656         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5657    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5658   "TARGET_P9_VECTOR"
5659 {
5660   int i;
5661   int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5662   int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
5663
5664   rtx rvals[16];
5665   rtx mask = gen_reg_rtx (V16QImode);
5666   rtx tmp = gen_reg_rtx (V16QImode);
5667   rtvec v;
5668
5669   for (i = 0; i < 16; i++)
5670     if (!BYTES_BIG_ENDIAN)
5671       rvals[i] = GEN_INT (vals_le[i]);
5672     else
5673       rvals[i] = GEN_INT (vals_be[i]);
5674
5675   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5676      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5677      src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5678      conversion instruction.  */
5679   v = gen_rtvec_v (16, rvals);
5680   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5681   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5682                                           operands[1], mask));
5683   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5684   DONE;
5685 })
5686
5687 ;; Generate vector extract four float 32 values from right four elements
5688 ;; of eight element vector of float 16 values.
5689 (define_expand "vextract_fp_from_shortl"
5690   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5691         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5692         UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5693   "TARGET_P9_VECTOR"
5694 {
5695   int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5696   int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
5697
5698   int i;
5699   rtx rvals[16];
5700   rtx mask = gen_reg_rtx (V16QImode);
5701   rtx tmp = gen_reg_rtx (V16QImode);
5702   rtvec v;
5703
5704   for (i = 0; i < 16; i++)
5705     if (!BYTES_BIG_ENDIAN)
5706       rvals[i] = GEN_INT (vals_le[i]);
5707     else
5708       rvals[i] = GEN_INT (vals_be[i]);
5709
5710   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5711      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5712      src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5713      conversion instruction.  */
5714   v = gen_rtvec_v (16, rvals);
5715   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5716   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5717                                           operands[1], mask));
5718   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5719   DONE;
5720 })
5721
5722 ;; Support for ISA 3.0 vector byte reverse
5723
5724 ;; Swap all bytes with in a vector
5725 (define_insn "p9_xxbrq_v1ti"
5726   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5727         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5728   "TARGET_P9_VECTOR"
5729   "xxbrq %x0,%x1"
5730   [(set_attr "type" "vecperm")])
5731
5732 (define_expand "p9_xxbrq_v16qi"
5733   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5734    (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5735   "TARGET_P9_VECTOR"
5736 {
5737   rtx op0 = gen_reg_rtx (V1TImode);
5738   rtx op1 = gen_lowpart (V1TImode, operands[1]);
5739   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5740   emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5741   DONE;
5742 })
5743
5744 ;; Swap all bytes in each 64-bit element
5745 (define_insn "p9_xxbrd_v2di"
5746   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5747         (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5748   "TARGET_P9_VECTOR"
5749   "xxbrd %x0,%x1"
5750   [(set_attr "type" "vecperm")])
5751
5752 (define_expand "p9_xxbrd_v2df"
5753   [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5754    (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5755   "TARGET_P9_VECTOR"
5756 {
5757   rtx op0 = gen_reg_rtx (V2DImode);
5758   rtx op1 = gen_lowpart (V2DImode, operands[1]);
5759   emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5760   emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5761   DONE;
5762 })
5763
5764 ;; Swap all bytes in each 32-bit element
5765 (define_insn "p9_xxbrw_v4si"
5766   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5767         (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5768   "TARGET_P9_VECTOR"
5769   "xxbrw %x0,%x1"
5770   [(set_attr "type" "vecperm")])
5771
5772 (define_expand "p9_xxbrw_v4sf"
5773   [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5774    (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5775   "TARGET_P9_VECTOR"
5776 {
5777   rtx op0 = gen_reg_rtx (V4SImode);
5778   rtx op1 = gen_lowpart (V4SImode, operands[1]);
5779   emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5780   emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5781   DONE;
5782 })
5783
5784 ;; Swap all bytes in each element of vector
5785 (define_expand "revb_<mode>"
5786   [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5787    (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5788   ""
5789 {
5790   if (TARGET_P9_VECTOR)
5791     emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5792   else
5793     {
5794       /* Want to have the elements in reverse order relative
5795          to the endian mode in use, i.e. in LE mode, put elements
5796          in BE order.  */
5797       rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5798       emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5799                                            operands[1], sel));
5800     }
5801
5802   DONE;
5803 })
5804
5805 ;; Reversing bytes in vector char is just a NOP.
5806 (define_expand "revb_v16qi"
5807   [(set (match_operand:V16QI 0 "vsx_register_operand")
5808         (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5809   ""
5810 {
5811   emit_move_insn (operands[0], operands[1]);
5812   DONE;
5813 })
5814
5815 ;; Swap all bytes in each 16-bit element
5816 (define_insn "p9_xxbrh_v8hi"
5817   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5818         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5819   "TARGET_P9_VECTOR"
5820   "xxbrh %x0,%x1"
5821   [(set_attr "type" "vecperm")])
5822 \f
5823
5824 ;; Operand numbers for the following peephole2
5825 (define_constants
5826   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
5827    (SFBOOL_TMP_VSX               1)             ;; vector temporary
5828    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
5829    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
5830    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
5831    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
5832    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
5833    (SFBOOL_SHL_D                 7)             ;; shift left dest
5834    (SFBOOL_SHL_A                 8)             ;; shift left arg
5835    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
5836    (SFBOOL_MFVSR_A_V4SF         10)             ;; SFBOOL_MFVSR_A as V4SFmode
5837    (SFBOOL_BOOL_A_DI            11)             ;; SFBOOL_BOOL_A1/A2 as DImode
5838    (SFBOOL_TMP_VSX_DI           12)             ;; SFBOOL_TMP_VSX as DImode
5839    (SFBOOL_MTVSR_D_V4SF         13)])           ;; SFBOOL_MTVSRD_D as V4SFmode
5840
5841 ;; Attempt to optimize some common GLIBC operations using logical operations to
5842 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
5843 ;; after macro expansion that looks like:
5844 ;;
5845 ;;      typedef union {
5846 ;;        float value;
5847 ;;        uint32_t word;
5848 ;;      } ieee_float_shape_type;
5849 ;;
5850 ;;      float t1;
5851 ;;      int32_t is;
5852 ;;
5853 ;;      do {
5854 ;;        ieee_float_shape_type gf_u;
5855 ;;        gf_u.value = (t1);
5856 ;;        (is) = gf_u.word;
5857 ;;      } while (0);
5858 ;;
5859 ;;      do {
5860 ;;        ieee_float_shape_type sf_u;
5861 ;;        sf_u.word = (is & 0xfffff000);
5862 ;;        (t1) = sf_u.value;
5863 ;;      } while (0);
5864 ;;
5865 ;;
5866 ;; This would result in two direct move operations (convert to memory format,
5867 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5868 ;; scalar format).  With this peephole, we eliminate the direct move to the
5869 ;; GPR, and instead move the integer mask value to the vector register after a
5870 ;; shift and do the VSX logical operation.
5871
5872 ;; The insns for dealing with SFmode in GPR registers looks like:
5873 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5874 ;;
5875 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5876 ;;
5877 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5878 ;;
5879 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5880 ;;
5881 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5882 ;;
5883 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5884
5885 (define_peephole2
5886   [(match_scratch:DI SFBOOL_TMP_GPR "r")
5887    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5888
5889    ;; MFVSRWZ (aka zero_extend)
5890    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5891         (zero_extend:DI
5892          (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5893
5894    ;; AND/IOR/XOR operation on int
5895    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5896         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5897                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5898
5899    ;; SLDI
5900    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5901         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5902                    (const_int 32)))
5903
5904    ;; MTVSRD
5905    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5906         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5907
5908   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5909    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5910       to compare registers, when the mode is different.  */
5911    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5912    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5913    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5914    && (REG_P (operands[SFBOOL_BOOL_A2])
5915        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5916    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5917        || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5918    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5919        || (REG_P (operands[SFBOOL_BOOL_A2])
5920            && REGNO (operands[SFBOOL_MFVSR_D])
5921                 == REGNO (operands[SFBOOL_BOOL_A2])))
5922    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5923    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5924        || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5925    && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5926   [(set (match_dup SFBOOL_TMP_GPR)
5927         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5928                    (const_int 32)))
5929
5930    (set (match_dup SFBOOL_TMP_VSX_DI)
5931         (match_dup SFBOOL_TMP_GPR))
5932
5933    (set (match_dup SFBOOL_MTVSR_D_V4SF)
5934         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5935                           (match_dup SFBOOL_TMP_VSX)))]
5936 {
5937   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5938   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5939   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5940   int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5941   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5942   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5943
5944   if (CONST_INT_P (bool_a2))
5945     {
5946       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5947       emit_move_insn (tmp_gpr, bool_a2);
5948       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5949     }
5950   else
5951     {
5952       int regno_bool_a1 = REGNO (bool_a1);
5953       int regno_bool_a2 = REGNO (bool_a2);
5954       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5955                           ? regno_bool_a2 : regno_bool_a1);
5956       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5957     }
5958
5959   operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5960   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5961   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5962 })
5963
5964 ;; Support signed/unsigned long long to float conversion vectorization.
5965 ;; Note that any_float (pc) here is just for code attribute <su>.
5966 (define_expand "vec_pack<su>_float_v2di"
5967   [(match_operand:V4SF 0 "vfloat_operand")
5968    (match_operand:V2DI 1 "vint_operand")
5969    (match_operand:V2DI 2 "vint_operand")
5970    (any_float (pc))]
5971   "TARGET_VSX"
5972 {
5973   rtx r1 = gen_reg_rtx (V4SFmode);
5974   rtx r2 = gen_reg_rtx (V4SFmode);
5975   emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
5976   emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
5977   rs6000_expand_extract_even (operands[0], r1, r2);
5978   DONE;
5979 })
5980
5981 ;; Support float to signed/unsigned long long conversion vectorization.
5982 ;; Note that any_fix (pc) here is just for code attribute <su>.
5983 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
5984   [(match_operand:V2DI 0 "vint_operand")
5985    (match_operand:V4SF 1 "vfloat_operand")
5986    (any_fix (pc))]
5987   "TARGET_VSX"
5988 {
5989   rtx reg = gen_reg_rtx (V4SFmode);
5990   rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
5991   emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5992   DONE;
5993 })
5994
5995 ;; Note that any_fix (pc) here is just for code attribute <su>.
5996 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
5997   [(match_operand:V2DI 0 "vint_operand")
5998    (match_operand:V4SF 1 "vfloat_operand")
5999    (any_fix (pc))]
6000   "TARGET_VSX"
6001 {
6002   rtx reg = gen_reg_rtx (V4SFmode);
6003   rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
6004   emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6005   DONE;
6006 })
6007
6008 (define_insn "vsx_<xvcvbf16>"
6009   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
6010         (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
6011                       XVCVBF16))]
6012   "TARGET_POWER10"
6013   "<xvcvbf16> %x0,%x1"
6014   [(set_attr "type" "vecfloat")])
6015
6016 (define_insn "vec_mtvsrbmi"
6017   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
6018         (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")]
6019         UNSPEC_MTVSBM))]
6020   "TARGET_POWER10"
6021   "mtvsrbmi %0,%1"
6022 )
6023
6024 (define_insn "vec_mtvsr_<mode>"
6025   [(set (match_operand:VSX_MM 0 "altivec_register_operand" "=v")
6026         (unspec:VSX_MM [(match_operand:DI 1 "gpc_reg_operand" "r")]
6027         UNSPEC_MTVSBM))]
6028   "TARGET_POWER10"
6029   "mtvsr<wd>m %0,%1";
6030   [(set_attr "type" "vecsimple")])
6031
6032 (define_insn "vec_cntmb_<mode>"
6033   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
6034         (unspec:DI [(match_operand:VSX_MM4 1 "altivec_register_operand" "v")
6035                     (match_operand:QI 2 "const_0_to_1_operand" "n")]
6036         UNSPEC_VCNTMB))]
6037   "TARGET_POWER10"
6038   "vcntmb<VSX_MM_SUFFIX> %0,%1,%2"
6039   [(set_attr "type" "vecsimple")])
6040
6041 (define_insn "vec_extract_<mode>"
6042   [(set (match_operand:SI 0 "register_operand" "=r")
6043         (unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")]
6044         UNSPEC_VEXTRACT))]
6045   "TARGET_POWER10"
6046   "vextract<VSX_MM_SUFFIX>m %0,%1"
6047   [(set_attr "type" "vecsimple")])
6048
6049 (define_insn "vec_expand_<mode>"
6050   [(set (match_operand:VSX_MM 0 "vsx_register_operand" "=v")
6051         (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")]
6052         UNSPEC_VEXPAND))]
6053   "TARGET_POWER10"
6054   "vexpand<VSX_MM_SUFFIX>m %0,%1"
6055   [(set_attr "type" "vecsimple")])