gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Mode attribute for vector floate and floato conversions
  25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
  26
  27 ;; Iterator for both scalar and vector floating point types supported by VSX
  28 (define_mode_iterator VSX_B [DF V4SF V2DF])
  29
  30 ;; Iterator for the 2 64-bit vector types
  31 (define_mode_iterator VSX_D [V2DF V2DI])
  32
  33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  34 ;; types that goes in a single vector register.
  35 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  36                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  37                                   TI
  38                                   V1TI])
  39
  40 ;; Iterator for 128-bit integer types that go in a single vector register.
  41 (define_mode_iterator VSX_TI [TI V1TI])
  42
  43 ;; Iterator for the 2 32-bit vector types
  44 (define_mode_iterator VSX_W [V4SF V4SI])
  45
  46 ;; Iterator for the DF types
  47 (define_mode_iterator VSX_DF [V2DF DF])
  48
  49 ;; Iterator for vector floating point types supported by VSX
  50 (define_mode_iterator VSX_F [V4SF V2DF])
  51
  52 ;; Iterator for logical types supported by VSX
  53 (define_mode_iterator VSX_L [V16QI
  54                              V8HI
  55                              V4SI
  56                              V2DI
  57                              V4SF
  58                              V2DF
  59                              V1TI
  60                              TI
  61                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  62                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  63
  64 ;; Iterator for memory moves.
  65 (define_mode_iterator VSX_M [V16QI
  66                              V8HI
  67                              V4SI
  68                              V2DI
  69                              V4SF
  70                              V2DF
  71                              V1TI
  72                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  73                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  74                              TI])
  75
  76 (define_mode_attr VSX_XXBR  [(V8HI  "h")
  77                              (V4SI  "w")
  78                              (V4SF  "w")
  79                              (V2DF  "d")
  80                              (V2DI  "d")
  81                              (V1TI  "q")])
  82
  83 ;; Map into the appropriate load/store name based on the type
  84 (define_mode_attr VSm  [(V16QI "vw4")
  85                         (V8HI  "vw4")
  86                         (V4SI  "vw4")
  87                         (V4SF  "vw4")
  88                         (V2DF  "vd2")
  89                         (V2DI  "vd2")
  90                         (DF    "d")
  91                         (TF    "vd2")
  92                         (KF    "vd2")
  93                         (V1TI  "vd2")
  94                         (TI    "vd2")])
  95
  96 ;; Map the register class used
  97 (define_mode_attr VSr   [(V16QI "v")
  98                          (V8HI  "v")
  99                          (V4SI  "v")
 100                          (V4SF  "wa")
 101                          (V2DI  "wa")
 102                          (V2DF  "wa")
 103                          (DI    "wa")
 104                          (DF    "wa")
 105                          (SF    "wa")
 106                          (TF    "wa")
 107                          (KF    "wa")
 108                          (V1TI  "v")
 109                          (TI    "wa")])
 110
 111 ;; What value we need in the "isa" field, to make the IEEE QP float work.
 112 (define_mode_attr VSisa [(V16QI "*")
 113                          (V8HI  "*")
 114                          (V4SI  "*")
 115                          (V4SF  "*")
 116                          (V2DI  "*")
 117                          (V2DF  "*")
 118                          (DI    "*")
 119                          (DF    "*")
 120                          (SF    "*")
 121                          (V1TI  "*")
 122                          (TI    "*")
 123                          (TF    "p9tf")
 124                          (KF    "p9kf")])
 125
 126 ;; A mode attribute to disparage use of GPR registers, except for scalar
 127 ;; integer modes.
 128 (define_mode_attr ??r   [(V16QI "??r")
 129                          (V8HI  "??r")
 130                          (V4SI  "??r")
 131                          (V4SF  "??r")
 132                          (V2DI  "??r")
 133                          (V2DF  "??r")
 134                          (V1TI  "??r")
 135                          (KF    "??r")
 136                          (TF    "??r")
 137                          (TI    "r")])
 138
 139 ;; A mode attribute used for 128-bit constant values.
 140 (define_mode_attr nW    [(V16QI "W")
 141                          (V8HI  "W")
 142                          (V4SI  "W")
 143                          (V4SF  "W")
 144                          (V2DI  "W")
 145                          (V2DF  "W")
 146                          (V1TI  "W")
 147                          (KF    "W")
 148                          (TF    "W")
 149                          (TI    "n")])
 150
 151 ;; Same size integer type for floating point data
 152 (define_mode_attr VSi [(V4SF  "v4si")
 153                        (V2DF  "v2di")
 154                        (DF    "di")])
 155
 156 (define_mode_attr VSI [(V4SF  "V4SI")
 157                        (V2DF  "V2DI")
 158                        (DF    "DI")])
 159
 160 ;; Word size for same size conversion
 161 (define_mode_attr VSc [(V4SF "w")
 162                        (V2DF "d")
 163                        (DF   "d")])
 164
 165 ;; Map into either s or v, depending on whether this is a scalar or vector
 166 ;; operation
 167 (define_mode_attr VSv   [(V16QI "v")
 168                          (V8HI  "v")
 169                          (V4SI  "v")
 170                          (V4SF  "v")
 171                          (V2DI  "v")
 172                          (V2DF  "v")
 173                          (V1TI  "v")
 174                          (DF    "s")
 175                          (KF    "v")])
 176
 177 ;; Appropriate type for add ops (and other simple FP ops)
 178 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 179                                  (V4SF "vecfloat")
 180                                  (DF   "fp")])
 181
 182 ;; Appropriate type for multiply ops
 183 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 184                                  (V4SF "vecfloat")
 185                                  (DF   "dmul")])
 186
 187 ;; Appropriate type for divide ops.
 188 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 189                                  (V4SF "vecfdiv")
 190                                  (DF   "ddiv")])
 191
 192 ;; Map the scalar mode for a vector type
 193 (define_mode_attr VS_scalar [(V1TI      "TI")
 194                              (V2DF      "DF")
 195                              (V2DI      "DI")
 196                              (V4SF      "SF")
 197                              (V4SI      "SI")
 198                              (V8HI      "HI")
 199                              (V16QI     "QI")])
 200
 201 ;; Map to a double-sized vector mode
 202 (define_mode_attr VS_double [(V4SI      "V8SI")
 203                              (V4SF      "V8SF")
 204                              (V2DI      "V4DI")
 205                              (V2DF      "V4DF")
 206                              (V1TI      "V2TI")])
 207
 208 ;; Iterators for loading constants with xxspltib
 209 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 210 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 211
 212 ;; Vector reverse byte modes
 213 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 214
 215 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 216 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 217 ;; done on ISA 2.07 and not just ISA 3.0.
 218 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 219 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 220 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
 221
 222 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 223                                      (V8HI "h")
 224                                      (V4SI "w")])
 225
 226 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 227 ;; insert to validate the operand number.
 228 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 229                                          (V8HI  "const_0_to_7_operand")
 230                                          (V4SI  "const_0_to_3_operand")])
 231
 232 ;; Mode attribute to give the constraint for vector extract and insert
 233 ;; operations.
 234 (define_mode_attr VSX_EX [(V16QI "v")
 235                           (V8HI  "v")
 236                           (V4SI  "wa")])
 237
 238 ;; Mode iterator for binary floating types other than double to
 239 ;; optimize convert to that floating point type from an extract
 240 ;; of an integer type
 241 (define_mode_iterator VSX_EXTRACT_FL [SF
 242                                       (IF "FLOAT128_2REG_P (IFmode)")
 243                                       (KF "TARGET_FLOAT128_HW")
 244                                       (TF "FLOAT128_2REG_P (TFmode)
 245                                            || (FLOAT128_IEEE_P (TFmode)
 246                                                && TARGET_FLOAT128_HW)")])
 247
 248 ;; Mode iterator for binary floating types that have a direct conversion
 249 ;; from 64-bit integer to floating point
 250 (define_mode_iterator FL_CONV [SF
 251                                DF
 252                                (KF "TARGET_FLOAT128_HW")
 253                                (TF "TARGET_FLOAT128_HW
 254                                     && FLOAT128_IEEE_P (TFmode)")])
 255
 256 ;; Iterator for the 2 short vector types to do a splat from an integer
 257 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 258
 259 ;; Mode attribute to give the count for the splat instruction to splat
 260 ;; the value in the 64-bit integer slot
 261 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 262
 263 ;; Mode attribute to give the suffix for the splat instruction
 264 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 265
 266 ;; Iterator for the move to mask instructions
 267 (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI])
 268 (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI])
 269
 270 ;; Longer vec int modes for rotate/mask ops
 271 ;; and Vector Integer Multiply/Divide/Modulo Instructions
 272 (define_mode_iterator VIlong [V2DI V4SI])
 273
 274 ;; Constants for creating unspecs
 275 (define_c_enum "unspec"
 276   [UNSPEC_VSX_CONCAT
 277    UNSPEC_VSX_CVDPSXWS
 278    UNSPEC_VSX_CVDPUXWS
 279    UNSPEC_VSX_CVSPDP
 280    UNSPEC_VSX_CVHPSP
 281    UNSPEC_VSX_CVSPDPN
 282    UNSPEC_VSX_CVDPSPN
 283    UNSPEC_VSX_CVSXWDP
 284    UNSPEC_VSX_CVUXWDP
 285    UNSPEC_VSX_CVSXDSP
 286    UNSPEC_VSX_CVUXDSP
 287    UNSPEC_VSX_FLOAT2
 288    UNSPEC_VSX_UNS_FLOAT2
 289    UNSPEC_VSX_FLOATE
 290    UNSPEC_VSX_UNS_FLOATE
 291    UNSPEC_VSX_FLOATO
 292    UNSPEC_VSX_UNS_FLOATO
 293    UNSPEC_VSX_TDIV
 294    UNSPEC_VSX_TSQRT
 295    UNSPEC_VSX_SET
 296    UNSPEC_VSX_ROUND_I
 297    UNSPEC_VSX_ROUND_IC
 298    UNSPEC_VSX_SLDWI
 299    UNSPEC_VSX_XXPERM
 300
 301    UNSPEC_VSX_XXSPLTW
 302    UNSPEC_VSX_XXSPLTD
 303    UNSPEC_VSX_DIVSD
 304    UNSPEC_VSX_DIVUD
 305    UNSPEC_VSX_MULSD
 306    UNSPEC_VSX_SIGN_EXTEND
 307    UNSPEC_VSX_XVCVBF16SPN
 308    UNSPEC_VSX_XVCVSPBF16
 309    UNSPEC_VSX_XVCVSPSXDS
 310    UNSPEC_VSX_XVCVSPHP
 311    UNSPEC_VSX_VSLO
 312    UNSPEC_VSX_EXTRACT
 313    UNSPEC_VSX_SXEXPDP
 314    UNSPEC_VSX_SXSIG
 315    UNSPEC_VSX_SIEXPDP
 316    UNSPEC_VSX_SIEXPQP
 317    UNSPEC_VSX_SCMPEXPDP
 318    UNSPEC_VSX_SCMPEXPQP
 319    UNSPEC_VSX_STSTDC
 320    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
 321    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
 322    UNSPEC_VSX_VXEXP
 323    UNSPEC_VSX_VXSIG
 324    UNSPEC_VSX_VIEXP
 325    UNSPEC_VSX_VTSTDC
 326    UNSPEC_VSX_VSIGNED2
 327
 328    UNSPEC_LXVL
 329    UNSPEC_LXVLL
 330    UNSPEC_LVSL_REG
 331    UNSPEC_LVSR_REG
 332    UNSPEC_STXVL
 333    UNSPEC_STXVLL
 334    UNSPEC_XL_LEN_R
 335    UNSPEC_XST_LEN_R
 336
 337    UNSPEC_VCLZLSBB
 338    UNSPEC_VCTZLSBB
 339    UNSPEC_VEXTUBLX
 340    UNSPEC_VEXTUHLX
 341    UNSPEC_VEXTUWLX
 342    UNSPEC_VEXTUBRX
 343    UNSPEC_VEXTUHRX
 344    UNSPEC_VEXTUWRX
 345    UNSPEC_VCMPNEB
 346    UNSPEC_VCMPNEZB
 347    UNSPEC_VCMPNEH
 348    UNSPEC_VCMPNEZH
 349    UNSPEC_VCMPNEW
 350    UNSPEC_VCMPNEZW
 351    UNSPEC_XXEXTRACTUW
 352    UNSPEC_XXINSERTW
 353    UNSPEC_VSX_FIRST_MATCH_INDEX
 354    UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
 355    UNSPEC_VSX_FIRST_MISMATCH_INDEX
 356    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
 357    UNSPEC_XXGENPCV
 358    UNSPEC_MTVSBM
 359    UNSPEC_EXTENDDITI2
 360    UNSPEC_MTVSRD_DITI_W1
 361    UNSPEC_VCNTMB
 362    UNSPEC_VEXPAND
 363    UNSPEC_VEXTRACT
 364    UNSPEC_EXTRACTL
 365    UNSPEC_EXTRACTR
 366    UNSPEC_INSERTL
 367    UNSPEC_INSERTR
 368    UNSPEC_REPLACE_ELT
 369    UNSPEC_REPLACE_UN
 370    UNSPEC_VDIVES
 371    UNSPEC_VDIVEU
 372   ])
 373
 374 (define_int_iterator XVCVBF16   [UNSPEC_VSX_XVCVSPBF16
 375                                  UNSPEC_VSX_XVCVBF16SPN])
 376
 377 (define_int_attr xvcvbf16       [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
 378                                  (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")])
 379
 380 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
 381 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
 382
 383 ;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements
 384 (define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF])
 385 (define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
 386                                     (V2DI  "d") (V2DF "d")])
 387 (define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
 388                                   (V2DI  "3") (V2DF "3")])
 389 (define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
 390                                    (V2DI  "8") (V2DF "8")])
 391
 392 ;; VSX moves
 393
 394 ;; The patterns for LE permuted loads and stores come before the general
 395 ;; VSX moves so they match first.
 396 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 397   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
 398         (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
 399   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 400   "#"
 401   "&& 1"
 402   [(set (match_dup 2)
 403         (vec_select:<MODE>
 404           (match_dup 1)
 405           (parallel [(const_int 1) (const_int 0)])))
 406    (set (match_dup 0)
 407         (vec_select:<MODE>
 408           (match_dup 2)
 409           (parallel [(const_int 1) (const_int 0)])))]
 410 {
 411   rtx mem = operands[1];
 412
 413   /* Don't apply the swap optimization if we've already performed register
 414      allocation and the hard register destination is not in the altivec
 415      range.  */
 416   if ((MEM_ALIGN (mem) >= 128)
 417       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
 418           || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
 419     {
 420       rtx mem_address = XEXP (mem, 0);
 421       enum machine_mode mode = GET_MODE (mem);
 422
 423       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 424         {
 425           /* Replace the source memory address with masked address.  */
 426           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 427           emit_insn (lvx_set_expr);
 428           DONE;
 429         }
 430       else if (rs6000_quadword_masked_address_p (mem_address))
 431         {
 432           /* This rtl is already in the form that matches lvx
 433              instruction, so leave it alone.  */
 434           DONE;
 435         }
 436       /* Otherwise, fall through to transform into a swapping load.  */
 437     }
 438   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 439                                        : operands[0];
 440 }
 441   [(set_attr "type" "vecload")
 442    (set_attr "length" "8")])
 443
 444 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 445   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
 446         (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
 447   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 448   "#"
 449   "&& 1"
 450   [(set (match_dup 2)
 451         (vec_select:<MODE>
 452           (match_dup 1)
 453           (parallel [(const_int 2) (const_int 3)
 454                      (const_int 0) (const_int 1)])))
 455    (set (match_dup 0)
 456         (vec_select:<MODE>
 457           (match_dup 2)
 458           (parallel [(const_int 2) (const_int 3)
 459                      (const_int 0) (const_int 1)])))]
 460 {
 461   rtx mem = operands[1];
 462
 463   /* Don't apply the swap optimization if we've already performed register
 464      allocation and the hard register destination is not in the altivec
 465      range.  */
 466   if ((MEM_ALIGN (mem) >= 128)
 467       && (!HARD_REGISTER_P (operands[0])
 468           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 469     {
 470       rtx mem_address = XEXP (mem, 0);
 471       enum machine_mode mode = GET_MODE (mem);
 472
 473       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 474         {
 475           /* Replace the source memory address with masked address.  */
 476           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 477           emit_insn (lvx_set_expr);
 478           DONE;
 479         }
 480       else if (rs6000_quadword_masked_address_p (mem_address))
 481         {
 482           /* This rtl is already in the form that matches lvx
 483              instruction, so leave it alone.  */
 484           DONE;
 485         }
 486       /* Otherwise, fall through to transform into a swapping load.  */
 487     }
 488   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 489                                        : operands[0];
 490 }
 491   [(set_attr "type" "vecload")
 492    (set_attr "length" "8")])
 493
 494 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 495   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 496         (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
 497   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 498   "#"
 499   "&& 1"
 500   [(set (match_dup 2)
 501         (vec_select:V8HI
 502           (match_dup 1)
 503           (parallel [(const_int 4) (const_int 5)
 504                      (const_int 6) (const_int 7)
 505                      (const_int 0) (const_int 1)
 506                      (const_int 2) (const_int 3)])))
 507    (set (match_dup 0)
 508         (vec_select:V8HI
 509           (match_dup 2)
 510           (parallel [(const_int 4) (const_int 5)
 511                      (const_int 6) (const_int 7)
 512                      (const_int 0) (const_int 1)
 513                      (const_int 2) (const_int 3)])))]
 514 {
 515   rtx mem = operands[1];
 516
 517   /* Don't apply the swap optimization if we've already performed register
 518      allocation and the hard register destination is not in the altivec
 519      range.  */
 520   if ((MEM_ALIGN (mem) >= 128)
 521       && (!HARD_REGISTER_P (operands[0])
 522           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 523     {
 524       rtx mem_address = XEXP (mem, 0);
 525       enum machine_mode mode = GET_MODE (mem);
 526
 527       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 528         {
 529           /* Replace the source memory address with masked address.  */
 530           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 531           emit_insn (lvx_set_expr);
 532           DONE;
 533         }
 534       else if (rs6000_quadword_masked_address_p (mem_address))
 535         {
 536           /* This rtl is already in the form that matches lvx
 537              instruction, so leave it alone.  */
 538           DONE;
 539         }
 540       /* Otherwise, fall through to transform into a swapping load.  */
 541     }
 542   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 543                                        : operands[0];
 544 }
 545   [(set_attr "type" "vecload")
 546    (set_attr "length" "8")])
 547
 548 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 549   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 550         (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
 551   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 552   "#"
 553   "&& 1"
 554   [(set (match_dup 2)
 555         (vec_select:V16QI
 556           (match_dup 1)
 557           (parallel [(const_int 8) (const_int 9)
 558                      (const_int 10) (const_int 11)
 559                      (const_int 12) (const_int 13)
 560                      (const_int 14) (const_int 15)
 561                      (const_int 0) (const_int 1)
 562                      (const_int 2) (const_int 3)
 563                      (const_int 4) (const_int 5)
 564                      (const_int 6) (const_int 7)])))
 565    (set (match_dup 0)
 566         (vec_select:V16QI
 567           (match_dup 2)
 568           (parallel [(const_int 8) (const_int 9)
 569                      (const_int 10) (const_int 11)
 570                      (const_int 12) (const_int 13)
 571                      (const_int 14) (const_int 15)
 572                      (const_int 0) (const_int 1)
 573                      (const_int 2) (const_int 3)
 574                      (const_int 4) (const_int 5)
 575                      (const_int 6) (const_int 7)])))]
 576 {
 577   rtx mem = operands[1];
 578
 579   /* Don't apply the swap optimization if we've already performed register
 580      allocation and the hard register destination is not in the altivec
 581      range.  */
 582   if ((MEM_ALIGN (mem) >= 128)
 583       && (!HARD_REGISTER_P (operands[0])
 584           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 585     {
 586       rtx mem_address = XEXP (mem, 0);
 587       enum machine_mode mode = GET_MODE (mem);
 588
 589       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 590         {
 591           /* Replace the source memory address with masked address.  */
 592           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 593           emit_insn (lvx_set_expr);
 594           DONE;
 595         }
 596       else if (rs6000_quadword_masked_address_p (mem_address))
 597         {
 598           /* This rtl is already in the form that matches lvx
 599              instruction, so leave it alone.  */
 600           DONE;
 601         }
 602       /* Otherwise, fall through to transform into a swapping load.  */
 603     }
 604   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 605                                        : operands[0];
 606 }
 607   [(set_attr "type" "vecload")
 608    (set_attr "length" "8")])
 609
 610 (define_insn "*vsx_le_perm_store_<mode>"
 611   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
 612         (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
 613   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 614   "#"
 615   [(set_attr "type" "vecstore")
 616    (set_attr "length" "12")])
 617
 618 (define_split
 619   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 620         (match_operand:VSX_D 1 "vsx_register_operand"))]
 621   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 622   [(set (match_dup 2)
 623         (vec_select:<MODE>
 624           (match_dup 1)
 625           (parallel [(const_int 1) (const_int 0)])))
 626    (set (match_dup 0)
 627         (vec_select:<MODE>
 628           (match_dup 2)
 629           (parallel [(const_int 1) (const_int 0)])))]
 630 {
 631   rtx mem = operands[0];
 632
 633   /* Don't apply the swap optimization if we've already performed register
 634      allocation and the hard register source is not in the altivec range.  */
 635   if ((MEM_ALIGN (mem) >= 128)
 636       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 637           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 638     {
 639       rtx mem_address = XEXP (mem, 0);
 640       enum machine_mode mode = GET_MODE (mem);
 641       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 642         {
 643           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 644           emit_insn (stvx_set_expr);
 645           DONE;
 646         }
 647       else if (rs6000_quadword_masked_address_p (mem_address))
 648         {
 649           /* This rtl is already in the form that matches stvx instruction,
 650              so leave it alone.  */
 651           DONE;
 652         }
 653       /* Otherwise, fall through to transform into a swapping store.  */
 654     }
 655
 656   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 657                                        : operands[1];
 658 })
 659
 660 ;; The post-reload split requires that we re-permute the source
 661 ;; register in case it is still live.
 662 (define_split
 663   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 664         (match_operand:VSX_D 1 "vsx_register_operand"))]
 665   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 666   [(set (match_dup 1)
 667         (vec_select:<MODE>
 668           (match_dup 1)
 669           (parallel [(const_int 1) (const_int 0)])))
 670    (set (match_dup 0)
 671         (vec_select:<MODE>
 672           (match_dup 1)
 673           (parallel [(const_int 1) (const_int 0)])))
 674    (set (match_dup 1)
 675         (vec_select:<MODE>
 676           (match_dup 1)
 677           (parallel [(const_int 1) (const_int 0)])))]
 678   "")
 679
 680 (define_insn "*vsx_le_perm_store_<mode>"
 681   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
 682         (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
 683   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 684   "#"
 685   [(set_attr "type" "vecstore")
 686    (set_attr "length" "12")])
 687
 688 (define_split
 689   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 690         (match_operand:VSX_W 1 "vsx_register_operand"))]
 691   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 692   [(set (match_dup 2)
 693         (vec_select:<MODE>
 694           (match_dup 1)
 695           (parallel [(const_int 2) (const_int 3)
 696                      (const_int 0) (const_int 1)])))
 697    (set (match_dup 0)
 698         (vec_select:<MODE>
 699           (match_dup 2)
 700           (parallel [(const_int 2) (const_int 3)
 701                      (const_int 0) (const_int 1)])))]
 702 {
 703   rtx mem = operands[0];
 704
 705   /* Don't apply the swap optimization if we've already performed register
 706      allocation and the hard register source is not in the altivec range.  */
 707   if ((MEM_ALIGN (mem) >= 128)
 708       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 709           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 710     {
 711       rtx mem_address = XEXP (mem, 0);
 712       enum machine_mode mode = GET_MODE (mem);
 713       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 714         {
 715           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 716           emit_insn (stvx_set_expr);
 717           DONE;
 718         }
 719       else if (rs6000_quadword_masked_address_p (mem_address))
 720         {
 721           /* This rtl is already in the form that matches stvx instruction,
 722              so leave it alone.  */
 723           DONE;
 724         }
 725       /* Otherwise, fall through to transform into a swapping store.  */
 726     }
 727
 728   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 729                                        : operands[1];
 730 })
 731
 732 ;; The post-reload split requires that we re-permute the source
 733 ;; register in case it is still live.
 734 (define_split
 735   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 736         (match_operand:VSX_W 1 "vsx_register_operand"))]
 737   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 738   [(set (match_dup 1)
 739         (vec_select:<MODE>
 740           (match_dup 1)
 741           (parallel [(const_int 2) (const_int 3)
 742                      (const_int 0) (const_int 1)])))
 743    (set (match_dup 0)
 744         (vec_select:<MODE>
 745           (match_dup 1)
 746           (parallel [(const_int 2) (const_int 3)
 747                      (const_int 0) (const_int 1)])))
 748    (set (match_dup 1)
 749         (vec_select:<MODE>
 750           (match_dup 1)
 751           (parallel [(const_int 2) (const_int 3)
 752                      (const_int 0) (const_int 1)])))]
 753   "")
 754
 755 (define_insn "*vsx_le_perm_store_v8hi"
 756   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
 757         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 758   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 759   "#"
 760   [(set_attr "type" "vecstore")
 761    (set_attr "length" "12")])
 762
 763 (define_split
 764   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 765         (match_operand:V8HI 1 "vsx_register_operand"))]
 766   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 767   [(set (match_dup 2)
 768         (vec_select:V8HI
 769           (match_dup 1)
 770           (parallel [(const_int 4) (const_int 5)
 771                      (const_int 6) (const_int 7)
 772                      (const_int 0) (const_int 1)
 773                      (const_int 2) (const_int 3)])))
 774    (set (match_dup 0)
 775         (vec_select:V8HI
 776           (match_dup 2)
 777           (parallel [(const_int 4) (const_int 5)
 778                      (const_int 6) (const_int 7)
 779                      (const_int 0) (const_int 1)
 780                      (const_int 2) (const_int 3)])))]
 781 {
 782   rtx mem = operands[0];
 783
 784   /* Don't apply the swap optimization if we've already performed register
 785      allocation and the hard register source is not in the altivec range.  */
 786   if ((MEM_ALIGN (mem) >= 128)
 787       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 788           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 789     {
 790       rtx mem_address = XEXP (mem, 0);
 791       enum machine_mode mode = GET_MODE (mem);
 792       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 793         {
 794           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 795           emit_insn (stvx_set_expr);
 796           DONE;
 797         }
 798       else if (rs6000_quadword_masked_address_p (mem_address))
 799         {
 800           /* This rtl is already in the form that matches stvx instruction,
 801              so leave it alone.  */
 802           DONE;
 803         }
 804       /* Otherwise, fall through to transform into a swapping store.  */
 805     }
 806
 807   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 808                                        : operands[1];
 809 })
 810
 811 ;; The post-reload split requires that we re-permute the source
 812 ;; register in case it is still live.
 813 (define_split
 814   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 815         (match_operand:V8HI 1 "vsx_register_operand"))]
 816   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 817   [(set (match_dup 1)
 818         (vec_select:V8HI
 819           (match_dup 1)
 820           (parallel [(const_int 4) (const_int 5)
 821                      (const_int 6) (const_int 7)
 822                      (const_int 0) (const_int 1)
 823                      (const_int 2) (const_int 3)])))
 824    (set (match_dup 0)
 825         (vec_select:V8HI
 826           (match_dup 1)
 827           (parallel [(const_int 4) (const_int 5)
 828                      (const_int 6) (const_int 7)
 829                      (const_int 0) (const_int 1)
 830                      (const_int 2) (const_int 3)])))
 831    (set (match_dup 1)
 832         (vec_select:V8HI
 833           (match_dup 1)
 834           (parallel [(const_int 4) (const_int 5)
 835                      (const_int 6) (const_int 7)
 836                      (const_int 0) (const_int 1)
 837                      (const_int 2) (const_int 3)])))]
 838   "")
 839
 840 (define_insn "*vsx_le_perm_store_v16qi"
 841   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
 842         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 843   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 844   "#"
 845   [(set_attr "type" "vecstore")
 846    (set_attr "length" "12")])
 847
 848 (define_split
 849   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 850         (match_operand:V16QI 1 "vsx_register_operand"))]
 851   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 852   [(set (match_dup 2)
 853         (vec_select:V16QI
 854           (match_dup 1)
 855           (parallel [(const_int 8) (const_int 9)
 856                      (const_int 10) (const_int 11)
 857                      (const_int 12) (const_int 13)
 858                      (const_int 14) (const_int 15)
 859                      (const_int 0) (const_int 1)
 860                      (const_int 2) (const_int 3)
 861                      (const_int 4) (const_int 5)
 862                      (const_int 6) (const_int 7)])))
 863    (set (match_dup 0)
 864         (vec_select:V16QI
 865           (match_dup 2)
 866           (parallel [(const_int 8) (const_int 9)
 867                      (const_int 10) (const_int 11)
 868                      (const_int 12) (const_int 13)
 869                      (const_int 14) (const_int 15)
 870                      (const_int 0) (const_int 1)
 871                      (const_int 2) (const_int 3)
 872                      (const_int 4) (const_int 5)
 873                      (const_int 6) (const_int 7)])))]
 874 {
 875   rtx mem = operands[0];
 876
 877   /* Don't apply the swap optimization if we've already performed register
 878      allocation and the hard register source is not in the altivec range.  */
 879   if ((MEM_ALIGN (mem) >= 128)
 880       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 881           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 882     {
 883       rtx mem_address = XEXP (mem, 0);
 884       enum machine_mode mode = GET_MODE (mem);
 885       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 886         {
 887           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 888           emit_insn (stvx_set_expr);
 889           DONE;
 890         }
 891       else if (rs6000_quadword_masked_address_p (mem_address))
 892         {
 893           /* This rtl is already in the form that matches stvx instruction,
 894              so leave it alone.  */
 895           DONE;
 896         }
 897       /* Otherwise, fall through to transform into a swapping store.  */
 898     }
 899
 900   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 901                                        : operands[1];
 902 })
 903
 904 ;; The post-reload split requires that we re-permute the source
 905 ;; register in case it is still live.
 906 (define_split
 907   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 908         (match_operand:V16QI 1 "vsx_register_operand"))]
 909   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 910   [(set (match_dup 1)
 911         (vec_select:V16QI
 912           (match_dup 1)
 913           (parallel [(const_int 8) (const_int 9)
 914                      (const_int 10) (const_int 11)
 915                      (const_int 12) (const_int 13)
 916                      (const_int 14) (const_int 15)
 917                      (const_int 0) (const_int 1)
 918                      (const_int 2) (const_int 3)
 919                      (const_int 4) (const_int 5)
 920                      (const_int 6) (const_int 7)])))
 921    (set (match_dup 0)
 922         (vec_select:V16QI
 923           (match_dup 1)
 924           (parallel [(const_int 8) (const_int 9)
 925                      (const_int 10) (const_int 11)
 926                      (const_int 12) (const_int 13)
 927                      (const_int 14) (const_int 15)
 928                      (const_int 0) (const_int 1)
 929                      (const_int 2) (const_int 3)
 930                      (const_int 4) (const_int 5)
 931                      (const_int 6) (const_int 7)])))
 932    (set (match_dup 1)
 933         (vec_select:V16QI
 934           (match_dup 1)
 935           (parallel [(const_int 8) (const_int 9)
 936                      (const_int 10) (const_int 11)
 937                      (const_int 12) (const_int 13)
 938                      (const_int 14) (const_int 15)
 939                      (const_int 0) (const_int 1)
 940                      (const_int 2) (const_int 3)
 941                      (const_int 4) (const_int 5)
 942                      (const_int 6) (const_int 7)])))]
 943   "")
 944
 945 ;; Little endian word swapping for 128-bit types that are either scalars or the
 946 ;; special V1TI container class, which it is not appropriate to use vec_select
 947 ;; for the type.
 948 (define_insn "*vsx_le_permute_<mode>"
 949   [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
 950         (rotate:VSX_TI
 951          (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
 952          (const_int 64)))]
 953   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 954   "@
 955    xxpermdi %x0,%x1,%x1,2
 956    lxvd2x %x0,%y1
 957    stxvd2x %x1,%y0
 958    mr %0,%L1\;mr %L0,%1
 959    ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
 960    std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
 961   [(set_attr "length" "*,*,*,8,8,8")
 962    (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
 963
 964 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
 965   [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
 966         (rotate:VSX_TI
 967          (rotate:VSX_TI
 968           (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
 969           (const_int 64))
 970          (const_int 64)))]
 971   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 972   "@
 973    #
 974    xxlor %x0,%x1"
 975   "&& 1"
 976   [(set (match_dup 0) (match_dup 1))]
 977 {
 978   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
 979     {
 980       emit_note (NOTE_INSN_DELETED);
 981       DONE;
 982     }
 983 }
 984   [(set_attr "length" "0,4")
 985    (set_attr "type" "veclogical")])
 986
 987 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 988   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
 989         (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
 990   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
 991    && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
 992   "@
 993    #
 994    #"
 995   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
 996    && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
 997   [(const_int 0)]
 998 {
 999   rtx tmp = (can_create_pseudo_p ()
1000              ? gen_reg_rtx_and_attrs (operands[0])
1001              : operands[0]);
1002   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1003   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1004   DONE;
1005 }
1006   [(set_attr "type" "vecload,load")
1007    (set_attr "length" "8,8")
1008    (set_attr "isa" "<VSisa>,*")])
1009
1010 (define_insn "*vsx_le_perm_store_<mode>"
1011   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1012         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
1013   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1014    & !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1015   "@
1016    #
1017    #"
1018   [(set_attr "type" "vecstore,store")
1019    (set_attr "length" "12,8")
1020    (set_attr "isa" "<VSisa>,*")])
1021
1022 (define_split
1023   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1024         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1025   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR
1026    && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1027   [(const_int 0)]
1028 {
1029   rtx tmp = (can_create_pseudo_p ()
1030              ? gen_reg_rtx_and_attrs (operands[0])
1031              : operands[0]);
1032   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1033   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1034   DONE;
1035 })
1036
1037 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1038 ;; GPR registers on a little endian system.
1039 (define_peephole2
1040   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1041         (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1042                        (const_int 64)))
1043    (set (match_operand:VSX_TI 2 "int_reg_operand")
1044         (rotate:VSX_TI (match_dup 0)
1045                        (const_int 64)))]
1046   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1047    && (rtx_equal_p (operands[0], operands[2])
1048        || peep2_reg_dead_p (2, operands[0]))"
1049    [(set (match_dup 2) (match_dup 1))])
1050
1051 (define_peephole2
1052   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1053         (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1054                        (const_int 64)))
1055    (set (match_operand:VSX_TI 2 "memory_operand")
1056         (rotate:VSX_TI (match_dup 0)
1057                        (const_int 64)))]
1058   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1059    && peep2_reg_dead_p (2, operands[0])"
1060    [(set (match_dup 2) (match_dup 1))])
1061
1062 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1063 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1064 ;; floating point are handled by the more generic swap elimination pass.
1065 (define_peephole2
1066   [(set (match_operand:TI 0 "vsx_register_operand")
1067         (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1068                    (const_int 64)))
1069    (set (match_operand:TI 2 "vsx_register_operand")
1070         (rotate:TI (match_dup 0)
1071                    (const_int 64)))]
1072   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1073    && (rtx_equal_p (operands[0], operands[2])
1074        || peep2_reg_dead_p (2, operands[0]))"
1075    [(set (match_dup 2) (match_dup 1))])
1076
1077 ;; The post-reload split requires that we re-permute the source
1078 ;; register in case it is still live.
1079 (define_split
1080   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1081         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1082   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR
1083    && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1084   [(const_int 0)]
1085 {
1086   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1087   rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1088   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1089   DONE;
1090 })
1091
1092 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1093 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1094 (define_insn "xxspltib_v16qi"
1095   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1096         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1097   "TARGET_P9_VECTOR"
1098 {
1099   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1100   return "xxspltib %x0,%2";
1101 }
1102   [(set_attr "type" "vecperm")])
1103
1104 (define_insn "xxspltib_<mode>_nosplit"
1105   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1106         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1107   "TARGET_P9_VECTOR"
1108 {
1109   rtx op1 = operands[1];
1110   int value = 256;
1111   int num_insns = -1;
1112
1113   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1114       || num_insns != 1)
1115     gcc_unreachable ();
1116
1117   operands[2] = GEN_INT (value & 0xff);
1118   return "xxspltib %x0,%2";
1119 }
1120   [(set_attr "type" "vecperm")])
1121
1122 (define_insn_and_split "*xxspltib_<mode>_split"
1123   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1124         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1125   "TARGET_P9_VECTOR"
1126   "#"
1127   "&& 1"
1128   [(const_int 0)]
1129 {
1130   int value = 256;
1131   int num_insns = -1;
1132   rtx op0 = operands[0];
1133   rtx op1 = operands[1];
1134   rtx tmp = ((can_create_pseudo_p ())
1135              ? gen_reg_rtx (V16QImode)
1136              : gen_lowpart (V16QImode, op0));
1137
1138   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1139       || num_insns != 2)
1140     gcc_unreachable ();
1141
1142   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1143
1144   if (<MODE>mode == V2DImode)
1145     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1146
1147   else if (<MODE>mode == V4SImode)
1148     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1149
1150   else if (<MODE>mode == V8HImode)
1151     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1152
1153   else
1154     gcc_unreachable ();
1155
1156   DONE;
1157 }
1158   [(set_attr "type" "vecperm")
1159    (set_attr "length" "8")])
1160
1161
1162 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1163 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1164 ;; all 1's, since the machine does not have to wait for the previous
1165 ;; instruction using the register being set (such as a store waiting on a slow
1166 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1167
1168 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1169 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1170 ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
1171 (define_insn "vsx_mov<mode>_64bit"
1172   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1173                "=ZwO,      wa,        wa,        r,         we,        ?wQ,
1174                 ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
1175                 ?wa,       v,         <??r>,     wZ,        v")
1176
1177         (match_operand:VSX_M 1 "input_operand"
1178                "wa,        ZwO,       wa,        we,        r,         r,
1179                 wQ,        Y,         r,         r,         wE,        jwM,
1180                 ?jwM,      W,         <nW>,      v,         wZ"))]
1181
1182   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1183    && (register_operand (operands[0], <MODE>mode)
1184        || register_operand (operands[1], <MODE>mode))"
1185 {
1186   return rs6000_output_move_128bit (operands);
1187 }
1188   [(set_attr "type"
1189                "vecstore,  vecload,   vecsimple, mtvsr,     mfvsr,     load,
1190                 store,     load,      store,     *,         vecsimple, vecsimple,
1191                 vecsimple, *,         *,         vecstore,  vecload")
1192    (set_attr "num_insns"
1193                "*,         *,         *,         2,         *,         2,
1194                 2,         2,         2,         2,         *,         *,
1195                 *,         5,         2,         *,         *")
1196    (set_attr "max_prefixed_insns"
1197                "*,         *,         *,         *,         *,         2,
1198                 2,         2,         2,         2,         *,         *,
1199                 *,         *,         *,         *,         *")
1200    (set_attr "length"
1201                "*,         *,         *,         8,         *,         8,
1202                 8,         8,         8,         8,         *,         *,
1203                 *,         20,        8,         *,         *")
1204    (set_attr "isa"
1205                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1206                 *,         *,         *,         *,         p9v,       *,
1207                 <VSisa>,   *,         *,         *,         *")])
1208
1209 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1210 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
1211 ;;              LVX (VMX)  STVX (VMX)
1212 (define_insn "*vsx_mov<mode>_32bit"
1213   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1214                "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
1215                 wa,        v,         ?wa,       v,         <??r>,
1216                 wZ,        v")
1217
1218         (match_operand:VSX_M 1 "input_operand"
1219                "wa,        ZwO,       wa,        Y,         r,         r,
1220                 wE,        jwM,       ?jwM,      W,         <nW>,
1221                 v,         wZ"))]
1222
1223   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1224    && (register_operand (operands[0], <MODE>mode)
1225        || register_operand (operands[1], <MODE>mode))"
1226 {
1227   return rs6000_output_move_128bit (operands);
1228 }
1229   [(set_attr "type"
1230                "vecstore,  vecload,   vecsimple, load,      store,    *,
1231                 vecsimple, vecsimple, vecsimple, *,         *,
1232                 vecstore,  vecload")
1233    (set_attr "length"
1234                "*,         *,         *,         16,        16,        16,
1235                 *,         *,         *,         20,        16,
1236                 *,         *")
1237    (set_attr "isa"
1238                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1239                 p9v,       *,         <VSisa>,   *,         *,
1240                 *,         *")])
1241
1242 ;; Explicit  load/store expanders for the builtin functions
1243 (define_expand "vsx_load_<mode>"
1244   [(set (match_operand:VSX_M 0 "vsx_register_operand")
1245         (match_operand:VSX_M 1 "memory_operand"))]
1246   "VECTOR_MEM_VSX_P (<MODE>mode)"
1247 {
1248   /* Expand to swaps if needed, prior to swap optimization.  */
1249   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1250       && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode))
1251     {
1252       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1253       DONE;
1254     }
1255 })
1256
1257 (define_expand "vsx_store_<mode>"
1258   [(set (match_operand:VSX_M 0 "memory_operand")
1259         (match_operand:VSX_M 1 "vsx_register_operand"))]
1260   "VECTOR_MEM_VSX_P (<MODE>mode)"
1261 {
1262   /* Expand to swaps if needed, prior to swap optimization.  */
1263   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1264       && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode))
1265     {
1266       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1267       DONE;
1268     }
1269 })
1270
1271 ;; Load rightmost element from load_data
1272 ;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
1273 (define_insn "vsx_lxvr<wd>x"
1274   [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
1275         (zero_extend:TI (match_operand:INT_ISA3  1 "memory_operand" "Z")))]
1276   "TARGET_POWER10"
1277   "lxvr<wd>x %x0,%y1"
1278   [(set_attr "type" "vecload")])
1279
1280 ;; Store rightmost element into store_data
1281 ;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
1282 (define_insn "vsx_stxvr<wd>x"
1283   [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
1284         (truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
1285   "TARGET_POWER10"
1286   "stxvr<wd>x %x1,%y0"
1287   [(set_attr "type" "vecstore")])
1288
1289 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1290 ;; when you really want their element-reversing behavior.
1291 (define_insn "vsx_ld_elemrev_v2di"
1292   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1293         (vec_select:V2DI
1294           (match_operand:V2DI 1 "memory_operand" "Z")
1295           (parallel [(const_int 1) (const_int 0)])))]
1296   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1297   "lxvd2x %x0,%y1"
1298   [(set_attr "type" "vecload")])
1299
1300 (define_insn "vsx_ld_elemrev_v1ti"
1301   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1302         (vec_select:V1TI
1303           (match_operand:V1TI 1 "memory_operand" "Z")
1304           (parallel [(const_int 0)])))]
1305   "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1306 {
1307    return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1308 }
1309   [(set_attr "type" "vecload")])
1310
1311 (define_insn "vsx_ld_elemrev_v2df"
1312   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1313         (vec_select:V2DF
1314           (match_operand:V2DF 1 "memory_operand" "Z")
1315           (parallel [(const_int 1) (const_int 0)])))]
1316   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1317   "lxvd2x %x0,%y1"
1318   [(set_attr "type" "vecload")])
1319
1320 (define_insn "vsx_ld_elemrev_v4si"
1321   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1322         (vec_select:V4SI
1323           (match_operand:V4SI 1 "memory_operand" "Z")
1324           (parallel [(const_int 3) (const_int 2)
1325                      (const_int 1) (const_int 0)])))]
1326   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1327   "lxvw4x %x0,%y1"
1328   [(set_attr "type" "vecload")])
1329
1330 (define_insn "vsx_ld_elemrev_v4sf"
1331   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1332         (vec_select:V4SF
1333           (match_operand:V4SF 1 "memory_operand" "Z")
1334           (parallel [(const_int 3) (const_int 2)
1335                      (const_int 1) (const_int 0)])))]
1336   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1337   "lxvw4x %x0,%y1"
1338   [(set_attr "type" "vecload")])
1339
1340 (define_expand "vsx_ld_elemrev_v8hi"
1341   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1342         (vec_select:V8HI
1343           (match_operand:V8HI 1 "memory_operand" "Z")
1344           (parallel [(const_int 7) (const_int 6)
1345                      (const_int 5) (const_int 4)
1346                      (const_int 3) (const_int 2)
1347                      (const_int 1) (const_int 0)])))]
1348   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1349 {
1350   if (!TARGET_P9_VECTOR)
1351     {
1352       rtx tmp = gen_reg_rtx (V4SImode);
1353       rtx subreg, subreg2, perm[16], pcv;
1354       /* 2 is leftmost element in register */
1355       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1356       int i;
1357
1358       subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1359       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1360       subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1361
1362       for (i = 0; i < 16; ++i)
1363         perm[i] = GEN_INT (reorder[i]);
1364
1365       pcv = force_reg (V16QImode,
1366                        gen_rtx_CONST_VECTOR (V16QImode,
1367                                              gen_rtvec_v (16, perm)));
1368       emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1369                                                 subreg2, pcv));
1370       DONE;
1371     }
1372 })
1373
1374 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1375   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1376         (vec_select:V8HI
1377           (match_operand:V8HI 1 "memory_operand" "Z")
1378           (parallel [(const_int 7) (const_int 6)
1379                      (const_int 5) (const_int 4)
1380                      (const_int 3) (const_int 2)
1381                      (const_int 1) (const_int 0)])))]
1382   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1383   "lxvh8x %x0,%y1"
1384   [(set_attr "type" "vecload")])
1385
1386 (define_expand "vsx_ld_elemrev_v16qi"
1387   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1388         (vec_select:V16QI
1389           (match_operand:V16QI 1 "memory_operand" "Z")
1390           (parallel [(const_int 15) (const_int 14)
1391                      (const_int 13) (const_int 12)
1392                      (const_int 11) (const_int 10)
1393                      (const_int  9) (const_int  8)
1394                      (const_int  7) (const_int  6)
1395                      (const_int  5) (const_int  4)
1396                      (const_int  3) (const_int  2)
1397                      (const_int  1) (const_int  0)])))]
1398   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1399 {
1400   if (!TARGET_P9_VECTOR)
1401     {
1402       rtx tmp = gen_reg_rtx (V4SImode);
1403       rtx subreg, subreg2, perm[16], pcv;
1404       /* 3 is leftmost element in register */
1405       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1406       int i;
1407
1408       subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1409       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1410       subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1411
1412       for (i = 0; i < 16; ++i)
1413         perm[i] = GEN_INT (reorder[i]);
1414
1415       pcv = force_reg (V16QImode,
1416                        gen_rtx_CONST_VECTOR (V16QImode,
1417                                              gen_rtvec_v (16, perm)));
1418       emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1419                                                  subreg2, pcv));
1420       DONE;
1421     }
1422 })
1423
1424 (define_insn "vsx_ld_elemrev_v16qi_internal"
1425   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1426         (vec_select:V16QI
1427           (match_operand:V16QI 1 "memory_operand" "Z")
1428           (parallel [(const_int 15) (const_int 14)
1429                      (const_int 13) (const_int 12)
1430                      (const_int 11) (const_int 10)
1431                      (const_int  9) (const_int  8)
1432                      (const_int  7) (const_int  6)
1433                      (const_int  5) (const_int  4)
1434                      (const_int  3) (const_int  2)
1435                      (const_int  1) (const_int  0)])))]
1436   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1437   "lxvb16x %x0,%y1"
1438   [(set_attr "type" "vecload")])
1439
1440 (define_insn "vsx_st_elemrev_v1ti"
1441   [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1442         (vec_select:V1TI
1443           (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1444           (parallel [(const_int 0)])))
1445    (clobber (match_dup 1))]
1446   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1447 {
1448   return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1449 }
1450   [(set_attr "type" "vecstore")])
1451
1452 (define_insn "vsx_st_elemrev_v2df"
1453   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1454         (vec_select:V2DF
1455           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1456           (parallel [(const_int 1) (const_int 0)])))]
1457   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1458   "stxvd2x %x1,%y0"
1459   [(set_attr "type" "vecstore")])
1460
1461 (define_insn "vsx_st_elemrev_v2di"
1462   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1463         (vec_select:V2DI
1464           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1465           (parallel [(const_int 1) (const_int 0)])))]
1466   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1467   "stxvd2x %x1,%y0"
1468   [(set_attr "type" "vecstore")])
1469
1470 (define_insn "vsx_st_elemrev_v4sf"
1471   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1472         (vec_select:V4SF
1473           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1474           (parallel [(const_int 3) (const_int 2)
1475                      (const_int 1) (const_int 0)])))]
1476   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1477   "stxvw4x %x1,%y0"
1478   [(set_attr "type" "vecstore")])
1479
1480 (define_insn "vsx_st_elemrev_v4si"
1481   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1482         (vec_select:V4SI
1483           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1484           (parallel [(const_int 3) (const_int 2)
1485                      (const_int 1) (const_int 0)])))]
1486   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1487   "stxvw4x %x1,%y0"
1488   [(set_attr "type" "vecstore")])
1489
1490 (define_expand "vsx_st_elemrev_v8hi"
1491   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1492         (vec_select:V8HI
1493           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1494           (parallel [(const_int 7) (const_int 6)
1495                      (const_int 5) (const_int 4)
1496                      (const_int 3) (const_int 2)
1497                      (const_int 1) (const_int 0)])))]
1498   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1499 {
1500   if (!TARGET_P9_VECTOR)
1501     {
1502       rtx mem_subreg, subreg, perm[16], pcv;
1503       rtx tmp = gen_reg_rtx (V8HImode);
1504       /* 2 is leftmost element in register */
1505       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1506       int i;
1507
1508       for (i = 0; i < 16; ++i)
1509         perm[i] = GEN_INT (reorder[i]);
1510
1511       pcv = force_reg (V16QImode,
1512                        gen_rtx_CONST_VECTOR (V16QImode,
1513                                              gen_rtvec_v (16, perm)));
1514       emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1515                                                 operands[1], pcv));
1516       subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1517       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1518       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1519       DONE;
1520     }
1521 })
1522
1523 (define_insn "*vsx_st_elemrev_v2di_internal"
1524   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1525         (vec_select:V2DI
1526           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1527           (parallel [(const_int 1) (const_int 0)])))]
1528   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1529   "stxvd2x %x1,%y0"
1530   [(set_attr "type" "vecstore")])
1531
1532 (define_insn "*vsx_st_elemrev_v8hi_internal"
1533   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1534         (vec_select:V8HI
1535           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1536           (parallel [(const_int 7) (const_int 6)
1537                      (const_int 5) (const_int 4)
1538                      (const_int 3) (const_int 2)
1539                      (const_int 1) (const_int 0)])))]
1540   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1541   "stxvh8x %x1,%y0"
1542   [(set_attr "type" "vecstore")])
1543
1544 (define_expand "vsx_st_elemrev_v16qi"
1545   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1546         (vec_select:V16QI
1547           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1548           (parallel [(const_int 15) (const_int 14)
1549                      (const_int 13) (const_int 12)
1550                      (const_int 11) (const_int 10)
1551                      (const_int  9) (const_int  8)
1552                      (const_int  7) (const_int  6)
1553                      (const_int  5) (const_int  4)
1554                      (const_int  3) (const_int  2)
1555                      (const_int  1) (const_int  0)])))]
1556   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1557 {
1558   if (!TARGET_P9_VECTOR)
1559     {
1560       rtx mem_subreg, subreg, perm[16], pcv;
1561       rtx tmp = gen_reg_rtx (V16QImode);
1562       /* 3 is leftmost element in register */
1563       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1564       int i;
1565
1566       for (i = 0; i < 16; ++i)
1567         perm[i] = GEN_INT (reorder[i]);
1568
1569       pcv = force_reg (V16QImode,
1570                        gen_rtx_CONST_VECTOR (V16QImode,
1571                                              gen_rtvec_v (16, perm)));
1572       emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1573                                                  operands[1], pcv));
1574       subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1575       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1576       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1577       DONE;
1578     }
1579 })
1580
1581 (define_insn "*vsx_st_elemrev_v16qi_internal"
1582   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1583         (vec_select:V16QI
1584           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1585           (parallel [(const_int 15) (const_int 14)
1586                      (const_int 13) (const_int 12)
1587                      (const_int 11) (const_int 10)
1588                      (const_int  9) (const_int  8)
1589                      (const_int  7) (const_int  6)
1590                      (const_int  5) (const_int  4)
1591                      (const_int  3) (const_int  2)
1592                      (const_int  1) (const_int  0)])))]
1593   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1594   "stxvb16x %x1,%y0"
1595   [(set_attr "type" "vecstore")])
1596
1597 \f
1598 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1599 ;; instructions are now combined with the insn for the traditional floating
1600 ;; point unit.
1601 (define_insn "*vsx_add<mode>3"
1602   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1603         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1604                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1605   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1606   "xvadd<sd>p %x0,%x1,%x2"
1607   [(set_attr "type" "<VStype_simple>")])
1608
1609 (define_insn "*vsx_sub<mode>3"
1610   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1611         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1612                      (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1613   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1614   "xvsub<sd>p %x0,%x1,%x2"
1615   [(set_attr "type" "<VStype_simple>")])
1616
1617 (define_insn "*vsx_mul<mode>3"
1618   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1619         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1620                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1621   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1622   "xvmul<sd>p %x0,%x1,%x2"
1623   [(set_attr "type" "<VStype_simple>")])
1624
1625 ; Emulate vector with scalar for vec_mul in V2DImode
1626 (define_insn_and_split "vsx_mul_v2di"
1627   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1628         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1629                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1630                      UNSPEC_VSX_MULSD))]
1631   "VECTOR_MEM_VSX_P (V2DImode)"
1632   "#"
1633   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1634   [(const_int 0)]
1635 {
1636   rtx op0 = operands[0];
1637   rtx op1 = operands[1];
1638   rtx op2 = operands[2];
1639
1640   if (TARGET_POWER10)
1641     emit_insn (gen_mulv2di3 (op0, op1, op2) );
1642
1643   else
1644     {
1645       rtx op3 = gen_reg_rtx (DImode);
1646       rtx op4 = gen_reg_rtx (DImode);
1647       rtx op5 = gen_reg_rtx (DImode);
1648       emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1649       emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1650       if (TARGET_POWERPC64)
1651         emit_insn (gen_muldi3 (op5, op3, op4));
1652       else
1653         {
1654           rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1655           emit_move_insn (op5, ret);
1656         }
1657       emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1658       emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1659       if (TARGET_POWERPC64)
1660         emit_insn (gen_muldi3 (op3, op3, op4));
1661       else
1662         {
1663           rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1664           emit_move_insn (op3, ret);
1665         }
1666       emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1667     }
1668   DONE;
1669 }
1670   [(set_attr "type" "mul")])
1671
1672 (define_insn "*vsx_div<mode>3"
1673   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1674         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1675                    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1676   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1677   "xvdiv<sd>p %x0,%x1,%x2"
1678   [(set_attr "type" "<VStype_div>")])
1679
1680 ; Emulate vector with scalar for vec_div in V2DImode
1681 (define_insn_and_split "vsx_div_v2di"
1682   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1683         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1684                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1685                      UNSPEC_VSX_DIVSD))]
1686   "VECTOR_MEM_VSX_P (V2DImode)"
1687   "#"
1688   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1689   [(const_int 0)]
1690 {
1691   rtx op0 = operands[0];
1692   rtx op1 = operands[1];
1693   rtx op2 = operands[2];
1694   rtx op3 = gen_reg_rtx (DImode);
1695   rtx op4 = gen_reg_rtx (DImode);
1696   rtx op5 = gen_reg_rtx (DImode);
1697   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1698   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1699   if (TARGET_POWERPC64)
1700     emit_insn (gen_divdi3 (op5, op3, op4));
1701   else
1702     {
1703       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1704       rtx target = emit_library_call_value (libfunc,
1705                                             op5, LCT_NORMAL, DImode,
1706                                             op3, DImode,
1707                                             op4, DImode);
1708       emit_move_insn (op5, target);
1709     }
1710   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1711   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1712   if (TARGET_POWERPC64)
1713     emit_insn (gen_divdi3 (op3, op3, op4));
1714   else
1715     {
1716       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1717       rtx target = emit_library_call_value (libfunc,
1718                                             op3, LCT_NORMAL, DImode,
1719                                             op3, DImode,
1720                                             op4, DImode);
1721       emit_move_insn (op3, target);
1722     }
1723   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1724   DONE;
1725 }
1726   [(set_attr "type" "div")])
1727
1728 (define_insn_and_split "vsx_udiv_v2di"
1729   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1730         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1731                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1732                      UNSPEC_VSX_DIVUD))]
1733   "VECTOR_MEM_VSX_P (V2DImode)"
1734   "#"
1735   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1736   [(const_int 0)]
1737 {
1738   rtx op0 = operands[0];
1739   rtx op1 = operands[1];
1740   rtx op2 = operands[2];
1741
1742     if (TARGET_POWER10)
1743       emit_insn (gen_udivv2di3 (op0, op1, op2) );
1744     else
1745       {
1746         rtx op3 = gen_reg_rtx (DImode);
1747         rtx op4 = gen_reg_rtx (DImode);
1748         rtx op5 = gen_reg_rtx (DImode);
1749
1750         emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1751         emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1752
1753         if (TARGET_POWERPC64)
1754           emit_insn (gen_udivdi3 (op5, op3, op4));
1755         else
1756           {
1757             rtx libfunc = optab_libfunc (udiv_optab, DImode);
1758             rtx target = emit_library_call_value (libfunc,
1759                                                   op5, LCT_NORMAL, DImode,
1760                                                   op3, DImode,
1761                                                   op4, DImode);
1762             emit_move_insn (op5, target);
1763           }
1764         emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1765         emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1766
1767         if (TARGET_POWERPC64)
1768           emit_insn (gen_udivdi3 (op3, op3, op4));
1769         else
1770           {
1771             rtx libfunc = optab_libfunc (udiv_optab, DImode);
1772             rtx target = emit_library_call_value (libfunc,
1773                                                   op3, LCT_NORMAL, DImode,
1774                                                   op3, DImode,
1775                                                   op4, DImode);
1776             emit_move_insn (op3, target);
1777           }
1778         emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1779       }
1780     DONE;
1781 }
1782   [(set_attr "type" "div")])
1783
1784 ;; *tdiv* instruction returning the FG flag
1785 (define_expand "vsx_tdiv<mode>3_fg"
1786   [(set (match_dup 3)
1787         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1788                       (match_operand:VSX_B 2 "vsx_register_operand")]
1789                      UNSPEC_VSX_TDIV))
1790    (set (match_operand:SI 0 "gpc_reg_operand")
1791         (gt:SI (match_dup 3)
1792                (const_int 0)))]
1793   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1794 {
1795   operands[3] = gen_reg_rtx (CCFPmode);
1796 })
1797
1798 ;; *tdiv* instruction returning the FE flag
1799 (define_expand "vsx_tdiv<mode>3_fe"
1800   [(set (match_dup 3)
1801         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1802                       (match_operand:VSX_B 2 "vsx_register_operand")]
1803                      UNSPEC_VSX_TDIV))
1804    (set (match_operand:SI 0 "gpc_reg_operand")
1805         (eq:SI (match_dup 3)
1806                (const_int 0)))]
1807   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1808 {
1809   operands[3] = gen_reg_rtx (CCFPmode);
1810 })
1811
1812 (define_insn "*vsx_tdiv<mode>3_internal"
1813   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1814         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1815                       (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1816                    UNSPEC_VSX_TDIV))]
1817   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1818   "x<VSv>tdiv<sd>p %0,%x1,%x2"
1819   [(set_attr "type" "<VStype_simple>")])
1820
1821 (define_insn "vsx_fre<mode>2"
1822   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1823         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1824                       UNSPEC_FRES))]
1825   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1826   "xvre<sd>p %x0,%x1"
1827   [(set_attr "type" "<VStype_simple>")])
1828
1829 (define_insn "*vsx_neg<mode>2"
1830   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1831         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1832   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1833   "xvneg<sd>p %x0,%x1"
1834   [(set_attr "type" "<VStype_simple>")])
1835
1836 (define_insn "*vsx_abs<mode>2"
1837   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1838         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1839   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1840   "xvabs<sd>p %x0,%x1"
1841   [(set_attr "type" "<VStype_simple>")])
1842
1843 (define_insn "vsx_nabs<mode>2"
1844   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1845         (neg:VSX_F
1846          (abs:VSX_F
1847           (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1848   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1849   "xvnabs<sd>p %x0,%x1"
1850   [(set_attr "type" "<VStype_simple>")])
1851
1852 (define_insn "vsx_smax<mode>3"
1853   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1854         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1855                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1856   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1857   "xvmax<sd>p %x0,%x1,%x2"
1858   [(set_attr "type" "<VStype_simple>")])
1859
1860 (define_insn "*vsx_smin<mode>3"
1861   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1862         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1863                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1864   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1865   "xvmin<sd>p %x0,%x1,%x2"
1866   [(set_attr "type" "<VStype_simple>")])
1867
1868 (define_insn "*vsx_sqrt<mode>2"
1869   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1870         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1871   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1872   "xvsqrt<sd>p %x0,%x1"
1873   [(set_attr "type" "<sd>sqrt")])
1874
1875 (define_insn "*vsx_rsqrte<mode>2"
1876   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1877         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1878                       UNSPEC_RSQRT))]
1879   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1880   "xvrsqrte<sd>p %x0,%x1"
1881   [(set_attr "type" "<VStype_simple>")])
1882
1883 ;; *tsqrt* returning the fg flag
1884 (define_expand "vsx_tsqrt<mode>2_fg"
1885   [(set (match_dup 2)
1886         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1887                      UNSPEC_VSX_TSQRT))
1888    (set (match_operand:SI 0 "gpc_reg_operand")
1889         (gt:SI (match_dup 2)
1890                (const_int 0)))]
1891   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1892 {
1893   operands[2] = gen_reg_rtx (CCFPmode);
1894 })
1895
1896 ;; *tsqrt* returning the fe flag
1897 (define_expand "vsx_tsqrt<mode>2_fe"
1898   [(set (match_dup 2)
1899         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1900                      UNSPEC_VSX_TSQRT))
1901    (set (match_operand:SI 0 "gpc_reg_operand")
1902         (eq:SI (match_dup 2)
1903                (const_int 0)))]
1904   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1905 {
1906   operands[2] = gen_reg_rtx (CCFPmode);
1907 })
1908
1909 (define_insn "*vsx_tsqrt<mode>2_internal"
1910   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1911         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1912                      UNSPEC_VSX_TSQRT))]
1913   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1914   "x<VSv>tsqrt<sd>p %0,%x1"
1915   [(set_attr "type" "<VStype_simple>")])
1916
1917 ;; Fused vector multiply/add instructions. Support the classical Altivec
1918 ;; versions of fma, which allows the target to be a separate register from the
1919 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1920 ;; multiply.
1921
1922 (define_insn "*vsx_fmav4sf4"
1923   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1924         (fma:V4SF
1925           (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1926           (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1927           (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
1928   "VECTOR_UNIT_VSX_P (V4SFmode)"
1929   "@
1930    xvmaddasp %x0,%x1,%x2
1931    xvmaddmsp %x0,%x1,%x3
1932    vmaddfp %0,%1,%2,%3"
1933   [(set_attr "type" "vecfloat")])
1934
1935 (define_insn "*vsx_fmav2df4"
1936   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1937         (fma:V2DF
1938           (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1939           (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1940           (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1941   "VECTOR_UNIT_VSX_P (V2DFmode)"
1942   "@
1943    xvmaddadp %x0,%x1,%x2
1944    xvmaddmdp %x0,%x1,%x3"
1945   [(set_attr "type" "vecdouble")])
1946
1947 (define_insn "*vsx_fms<mode>4"
1948   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1949         (fma:VSX_F
1950           (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1951           (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1952           (neg:VSX_F
1953             (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1954   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1955   "@
1956    xvmsuba<sd>p %x0,%x1,%x2
1957    xvmsubm<sd>p %x0,%x1,%x3"
1958   [(set_attr "type" "<VStype_mul>")])
1959
1960 (define_insn "*vsx_nfma<mode>4"
1961   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1962         (neg:VSX_F
1963          (fma:VSX_F
1964           (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1965           (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1966           (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1967   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1968   "@
1969    xvnmadda<sd>p %x0,%x1,%x2
1970    xvnmaddm<sd>p %x0,%x1,%x3"
1971   [(set_attr "type" "<VStype_mul>")])
1972
1973 (define_insn "*vsx_nfmsv4sf4"
1974   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1975         (neg:V4SF
1976          (fma:V4SF
1977            (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1978            (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1979            (neg:V4SF
1980              (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
1981   "VECTOR_UNIT_VSX_P (V4SFmode)"
1982   "@
1983    xvnmsubasp %x0,%x1,%x2
1984    xvnmsubmsp %x0,%x1,%x3
1985    vnmsubfp %0,%1,%2,%3"
1986   [(set_attr "type" "vecfloat")])
1987
1988 (define_insn "*vsx_nfmsv2df4"
1989   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1990         (neg:V2DF
1991          (fma:V2DF
1992            (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1993            (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1994            (neg:V2DF
1995              (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1996   "VECTOR_UNIT_VSX_P (V2DFmode)"
1997   "@
1998    xvnmsubadp %x0,%x1,%x2
1999    xvnmsubmdp %x0,%x1,%x3"
2000   [(set_attr "type" "vecdouble")])
2001
2002 ;; Vector conditional expressions (no scalar version for these instructions)
2003 (define_insn "vsx_eq<mode>"
2004   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2005         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2006                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2007   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2008   "xvcmpeq<sd>p %x0,%x1,%x2"
2009   [(set_attr "type" "<VStype_simple>")])
2010
2011 (define_insn "vsx_gt<mode>"
2012   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2013         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2014                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2015   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2016   "xvcmpgt<sd>p %x0,%x1,%x2"
2017   [(set_attr "type" "<VStype_simple>")])
2018
2019 (define_insn "*vsx_ge<mode>"
2020   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2021         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2022                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2023   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2024   "xvcmpge<sd>p %x0,%x1,%x2"
2025   [(set_attr "type" "<VStype_simple>")])
2026
2027 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2028 ;; indicate a combined status
2029 (define_insn "*vsx_eq_<mode>_p"
2030   [(set (reg:CC CR6_REGNO)
2031         (unspec:CC
2032          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2033                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2034          UNSPEC_PREDICATE))
2035    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2036         (eq:VSX_F (match_dup 1)
2037                   (match_dup 2)))]
2038   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2039   "xvcmpeq<sd>p. %x0,%x1,%x2"
2040   [(set_attr "type" "<VStype_simple>")])
2041
2042 (define_insn "*vsx_gt_<mode>_p"
2043   [(set (reg:CC CR6_REGNO)
2044         (unspec:CC
2045          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2046                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2047          UNSPEC_PREDICATE))
2048    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2049         (gt:VSX_F (match_dup 1)
2050                   (match_dup 2)))]
2051   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2052   "xvcmpgt<sd>p. %x0,%x1,%x2"
2053   [(set_attr "type" "<VStype_simple>")])
2054
2055 ;; xvtlsbb BF,XB
2056 ;; Set the CR field BF to indicate if the lowest bit (bit 7) of every byte
2057 ;; element in VSR[XB] is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE).
2058 (define_insn "*xvtlsbb_internal"
2059   [(set (match_operand:CC 0 "cc_reg_operand" "=y")
2060         (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
2061          UNSPEC_XVTLSBB))]
2062   "TARGET_POWER10"
2063   "xvtlsbb %0,%x1"
2064   [(set_attr "type" "logical")])
2065
2066 ;; Vector Test Least Significant Bit by Byte
2067 ;; for the implementation of the builtin
2068 ;;     __builtin_vec_test_lsbb_all_ones
2069 ;;     int vec_test_lsbb_all_ones (vector unsigned char);
2070 ;; and
2071 ;;     __builtin_vec_test_lsbb_all_zeros
2072 ;;     int vec_test_lsbb_all_zeros (vector unsigned char);
2073 (define_expand "xvtlsbbo"
2074   [(set (match_dup 2)
2075         (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2076          UNSPEC_XVTLSBB))
2077    (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2078         (lt:SI (match_dup 2) (const_int 0)))]
2079   "TARGET_POWER10"
2080 {
2081    operands[2] = gen_reg_rtx (CCmode);
2082 })
2083 (define_expand "xvtlsbbz"
2084   [(set (match_dup 2)
2085         (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2086          UNSPEC_XVTLSBB))
2087    (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2088         (eq:SI (match_dup 2) (const_int 0)))]
2089   "TARGET_POWER10"
2090 {
2091    operands[2] = gen_reg_rtx (CCmode);
2092 })
2093
2094 (define_insn "*vsx_ge_<mode>_p"
2095   [(set (reg:CC CR6_REGNO)
2096         (unspec:CC
2097          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2098                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2099          UNSPEC_PREDICATE))
2100    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2101         (ge:VSX_F (match_dup 1)
2102                   (match_dup 2)))]
2103   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2104   "xvcmpge<sd>p. %x0,%x1,%x2"
2105   [(set_attr "type" "<VStype_simple>")])
2106
2107 ;; Vector select
2108 (define_insn "*vsx_xxsel<mode>"
2109   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2110         (if_then_else:VSX_L
2111          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2112                 (match_operand:VSX_L 4 "zero_constant" ""))
2113          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2114          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2115   "VECTOR_MEM_VSX_P (<MODE>mode)"
2116   "xxsel %x0,%x3,%x2,%x1"
2117   [(set_attr "type" "vecmove")
2118    (set_attr "isa" "<VSisa>")])
2119
2120 (define_insn "*vsx_xxsel<mode>_uns"
2121   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2122         (if_then_else:VSX_L
2123          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2124                    (match_operand:VSX_L 4 "zero_constant" ""))
2125          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2126          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2127   "VECTOR_MEM_VSX_P (<MODE>mode)"
2128   "xxsel %x0,%x3,%x2,%x1"
2129   [(set_attr "type" "vecmove")
2130    (set_attr "isa" "<VSisa>")])
2131
2132 ;; Copy sign
2133 (define_insn "vsx_copysign<mode>3"
2134   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2135         (unspec:VSX_F
2136          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2137           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2138          UNSPEC_COPYSIGN))]
2139   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2140   "xvcpsgn<sd>p %x0,%x2,%x1"
2141   [(set_attr "type" "<VStype_simple>")])
2142
2143 ;; For the conversions, limit the register class for the integer value to be
2144 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2145 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2146 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2147 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2148 ;; in allowing virtual registers.
2149 (define_insn "vsx_float<VSi><mode>2"
2150   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2151         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2152   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2153   "xvcvsx<VSc><sd>p %x0,%x1"
2154   [(set_attr "type" "<VStype_simple>")])
2155
2156 (define_insn "vsx_floatuns<VSi><mode>2"
2157   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2158         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2159   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2160   "xvcvux<VSc><sd>p %x0,%x1"
2161   [(set_attr "type" "<VStype_simple>")])
2162
2163 (define_insn "vsx_fix_trunc<mode><VSi>2"
2164   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2165         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2166   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2167   "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2168   [(set_attr "type" "<VStype_simple>")])
2169
2170 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2171   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2172         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2173   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2174   "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2175   [(set_attr "type" "<VStype_simple>")])
2176
2177 ;; Math rounding functions
2178 (define_insn "vsx_x<VSv>r<sd>pi"
2179   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2180         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2181                       UNSPEC_VSX_ROUND_I))]
2182   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2183   "x<VSv>r<sd>pi %x0,%x1"
2184   [(set_attr "type" "<VStype_simple>")])
2185
2186 (define_insn "vsx_x<VSv>r<sd>pic"
2187   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2188         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2189                       UNSPEC_VSX_ROUND_IC))]
2190   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2191   "x<VSv>r<sd>pic %x0,%x1"
2192   [(set_attr "type" "<VStype_simple>")])
2193
2194 (define_insn "vsx_btrunc<mode>2"
2195   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2196         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2197   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2198   "xvr<sd>piz %x0,%x1"
2199   [(set_attr "type" "<VStype_simple>")])
2200
2201 (define_insn "*vsx_b2trunc<mode>2"
2202   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2203         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2204                       UNSPEC_FRIZ))]
2205   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2206   "x<VSv>r<sd>piz %x0,%x1"
2207   [(set_attr "type" "<VStype_simple>")])
2208
2209 (define_insn "vsx_floor<mode>2"
2210   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2211         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2212                       UNSPEC_FRIM))]
2213   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2214   "xvr<sd>pim %x0,%x1"
2215   [(set_attr "type" "<VStype_simple>")])
2216
2217 (define_insn "vsx_ceil<mode>2"
2218   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2219         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2220                       UNSPEC_FRIP))]
2221   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2222   "xvr<sd>pip %x0,%x1"
2223   [(set_attr "type" "<VStype_simple>")])
2224
2225 \f
2226 ;; VSX convert to/from double vector
2227
2228 ;; Convert between single and double precision
2229 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2230 ;; scalar single precision instructions internally use the double format.
2231 ;; Prefer the altivec registers, since we likely will need to do a vperm
2232 (define_insn "vsx_xscvdpsp"
2233   [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2234         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2235                               UNSPEC_VSX_CVSPDP))]
2236   "VECTOR_UNIT_VSX_P (DFmode)"
2237   "xscvdpsp %x0,%x1"
2238   [(set_attr "type" "fp")])
2239
2240 (define_insn "vsx_xvcvspdp_be"
2241   [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2242      (float_extend:V2DF
2243        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2244          (parallel [(const_int 0) (const_int 2)]))))]
2245   "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2246   "xvcvspdp %x0,%x1"
2247   [(set_attr "type" "vecdouble")])
2248
2249 (define_insn "vsx_xvcvspdp_le"
2250   [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2251      (float_extend:V2DF
2252        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2253          (parallel [(const_int 1) (const_int 3)]))))]
2254   "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2255   "xvcvspdp %x0,%x1"
2256   [(set_attr "type" "vecdouble")])
2257
2258 (define_expand "vsx_xvcvspdp"
2259   [(match_operand:V2DF 0 "vsx_register_operand")
2260    (match_operand:V4SF 1 "vsx_register_operand")]
2261   "VECTOR_UNIT_VSX_P (V4SFmode)"
2262 {
2263   if (BYTES_BIG_ENDIAN)
2264     emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2265   else
2266     emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2267   DONE;
2268 })
2269
2270 (define_insn "vsx_xvcvdpsp"
2271   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2272         (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2273                               UNSPEC_VSX_CVSPDP))]
2274   "VECTOR_UNIT_VSX_P (V2DFmode)"
2275   "xvcvdpsp %x0,%x1"
2276   [(set_attr "type" "vecdouble")])
2277
2278 ;; xscvspdp, represent the scalar SF type as V4SF
2279 (define_insn "vsx_xscvspdp"
2280   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2281         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2282                    UNSPEC_VSX_CVSPDP))]
2283   "VECTOR_UNIT_VSX_P (V4SFmode)"
2284   "xscvspdp %x0,%x1"
2285   [(set_attr "type" "fp")])
2286
2287 ;; Same as vsx_xscvspdp, but use SF as the type
2288 (define_insn "vsx_xscvspdp_scalar2"
2289   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2290         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2291                    UNSPEC_VSX_CVSPDP))]
2292   "VECTOR_UNIT_VSX_P (V4SFmode)"
2293   "xscvspdp %x0,%x1"
2294   [(set_attr "type" "fp")])
2295
2296 ;; Generate xvcvhpsp instruction
2297 (define_insn "vsx_xvcvhpsp"
2298   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2299         (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2300                      UNSPEC_VSX_CVHPSP))]
2301   "TARGET_P9_VECTOR"
2302   "xvcvhpsp %x0,%x1"
2303   [(set_attr "type" "vecfloat")])
2304
2305 ;; Generate xvcvsphp
2306 (define_insn "vsx_xvcvsphp"
2307   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2308         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2309                      UNSPEC_VSX_XVCVSPHP))]
2310   "TARGET_P9_VECTOR"
2311   "xvcvsphp %x0,%x1"
2312 [(set_attr "type" "vecfloat")])
2313
2314 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2315 ;; format of scalars is actually DF.
2316 (define_insn "vsx_xscvdpsp_scalar"
2317   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2318         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2319                      UNSPEC_VSX_CVSPDP))]
2320   "VECTOR_UNIT_VSX_P (V4SFmode)"
2321   "xscvdpsp %x0,%x1"
2322   [(set_attr "type" "fp")])
2323
2324 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2325 (define_insn "vsx_xscvdpspn"
2326   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2327         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2328                      UNSPEC_VSX_CVDPSPN))]
2329   "TARGET_XSCVDPSPN"
2330   "xscvdpspn %x0,%x1"
2331   [(set_attr "type" "fp")])
2332
2333 (define_insn "vsx_xscvspdpn"
2334   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2335         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2336                    UNSPEC_VSX_CVSPDPN))]
2337   "TARGET_XSCVSPDPN"
2338   "xscvspdpn %x0,%x1"
2339   [(set_attr "type" "fp")])
2340
2341 (define_insn "vsx_xscvdpspn_scalar"
2342   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2343         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2344                      UNSPEC_VSX_CVDPSPN))]
2345   "TARGET_XSCVDPSPN"
2346   "xscvdpspn %x0,%x1"
2347   [(set_attr "type" "fp")])
2348
2349 ;; Used by direct move to move a SFmode value from GPR to VSX register
2350 (define_insn "vsx_xscvspdpn_directmove"
2351   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2352         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2353                    UNSPEC_VSX_CVSPDPN))]
2354   "TARGET_XSCVSPDPN"
2355   "xscvspdpn %x0,%x1"
2356   [(set_attr "type" "fp")])
2357
2358 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2359
2360 (define_insn "vsx_xvcv<su>xwsp"
2361   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2362      (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2363   "VECTOR_UNIT_VSX_P (V4SFmode)"
2364   "xvcv<su>xwsp %x0,%x1"
2365   [(set_attr "type" "vecfloat")])
2366
2367 (define_insn "vsx_xvcv<su>xddp"
2368   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2369         (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2370   "VECTOR_UNIT_VSX_P (V2DFmode)"
2371   "xvcv<su>xddp %x0,%x1"
2372   [(set_attr "type" "vecdouble")])
2373
2374 (define_insn "vsx_xvcvsp<su>xws"
2375   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2376         (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2377   "VECTOR_UNIT_VSX_P (V4SFmode)"
2378   "xvcvsp<su>xws %x0,%x1"
2379   [(set_attr "type" "vecfloat")])
2380
2381 (define_insn "vsx_xvcvdp<su>xds"
2382   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2383         (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2384   "VECTOR_UNIT_VSX_P (V2DFmode)"
2385   "xvcvdp<su>xds %x0,%x1"
2386   [(set_attr "type" "vecdouble")])
2387
2388 (define_expand "vsx_xvcvsxddp_scale"
2389   [(match_operand:V2DF 0 "vsx_register_operand")
2390    (match_operand:V2DI 1 "vsx_register_operand")
2391    (match_operand:QI 2 "immediate_operand")]
2392   "VECTOR_UNIT_VSX_P (V2DFmode)"
2393 {
2394   rtx op0 = operands[0];
2395   rtx op1 = operands[1];
2396   int scale = INTVAL(operands[2]);
2397   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2398   if (scale != 0)
2399     rs6000_scale_v2df (op0, op0, -scale);
2400   DONE;
2401 })
2402
2403 (define_expand "vsx_xvcvuxddp_scale"
2404   [(match_operand:V2DF 0 "vsx_register_operand")
2405    (match_operand:V2DI 1 "vsx_register_operand")
2406    (match_operand:QI 2 "immediate_operand")]
2407   "VECTOR_UNIT_VSX_P (V2DFmode)"
2408 {
2409   rtx op0 = operands[0];
2410   rtx op1 = operands[1];
2411   int scale = INTVAL(operands[2]);
2412   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2413   if (scale != 0)
2414     rs6000_scale_v2df (op0, op0, -scale);
2415   DONE;
2416 })
2417
2418 (define_expand "vsx_xvcvdpsxds_scale"
2419   [(match_operand:V2DI 0 "vsx_register_operand")
2420    (match_operand:V2DF 1 "vsx_register_operand")
2421    (match_operand:QI 2 "immediate_operand")]
2422   "VECTOR_UNIT_VSX_P (V2DFmode)"
2423 {
2424   rtx op0 = operands[0];
2425   rtx op1 = operands[1];
2426   rtx tmp;
2427   int scale = INTVAL (operands[2]);
2428   if (scale == 0)
2429     tmp = op1;
2430   else
2431     {
2432       tmp  = gen_reg_rtx (V2DFmode);
2433       rs6000_scale_v2df (tmp, op1, scale);
2434     }
2435   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2436   DONE;
2437 })
2438
2439 ;; convert vector of 64-bit floating point numbers to vector of
2440 ;; 64-bit unsigned integer
2441 (define_expand "vsx_xvcvdpuxds_scale"
2442   [(match_operand:V2DI 0 "vsx_register_operand")
2443    (match_operand:V2DF 1 "vsx_register_operand")
2444    (match_operand:QI 2 "immediate_operand")]
2445   "VECTOR_UNIT_VSX_P (V2DFmode)"
2446 {
2447   rtx op0 = operands[0];
2448   rtx op1 = operands[1];
2449   rtx tmp;
2450   int scale = INTVAL (operands[2]);
2451   if (scale == 0)
2452     tmp = op1;
2453   else
2454     {
2455       tmp = gen_reg_rtx (V2DFmode);
2456       rs6000_scale_v2df (tmp, op1, scale);
2457     }
2458   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2459   DONE;
2460 })
2461
2462 ;; Convert from 64-bit to 32-bit types
2463 ;; Note, favor the Altivec registers since the usual use of these instructions
2464 ;; is in vector converts and we need to use the Altivec vperm instruction.
2465
2466 (define_insn "vsx_xvcvdpsxws"
2467   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2468         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2469                      UNSPEC_VSX_CVDPSXWS))]
2470   "VECTOR_UNIT_VSX_P (V2DFmode)"
2471   "xvcvdpsxws %x0,%x1"
2472   [(set_attr "type" "vecdouble")])
2473
2474 (define_insn "vsx_xvcvdpuxws"
2475   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2476         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2477                      UNSPEC_VSX_CVDPUXWS))]
2478   "VECTOR_UNIT_VSX_P (V2DFmode)"
2479   "xvcvdpuxws %x0,%x1"
2480   [(set_attr "type" "vecdouble")])
2481
2482 (define_insn "vsx_xvcvsxdsp"
2483   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2484         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2485                      UNSPEC_VSX_CVSXDSP))]
2486   "VECTOR_UNIT_VSX_P (V2DFmode)"
2487   "xvcvsxdsp %x0,%x1"
2488   [(set_attr "type" "vecfloat")])
2489
2490 (define_insn "vsx_xvcvuxdsp"
2491   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2492         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2493                      UNSPEC_VSX_CVUXDSP))]
2494   "VECTOR_UNIT_VSX_P (V2DFmode)"
2495   "xvcvuxdsp %x0,%x1"
2496   [(set_attr "type" "vecdouble")])
2497
2498 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2499 ;; 64-bit floating point numbers.
2500 (define_insn "vsx_xvcv<su>xwdp_be"
2501   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2502      (any_float:V2DF
2503        (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2504          (parallel [(const_int 0) (const_int 2)]))))]
2505   "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2506   "xvcv<su>xwdp %x0,%x1"
2507   [(set_attr "type" "vecdouble")])
2508
2509 (define_insn "vsx_xvcv<su>xwdp_le"
2510   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2511      (any_float:V2DF
2512        (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2513          (parallel [(const_int 1) (const_int 3)]))))]
2514   "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2515   "xvcv<su>xwdp %x0,%x1"
2516   [(set_attr "type" "vecdouble")])
2517
2518 (define_expand "vsx_xvcv<su>xwdp"
2519   [(match_operand:V2DF 0 "vsx_register_operand")
2520    (match_operand:V4SI 1 "vsx_register_operand")
2521    (any_float (pc))]
2522   "VECTOR_UNIT_VSX_P (V2DFmode)"
2523 {
2524   if (BYTES_BIG_ENDIAN)
2525     emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2526   else
2527     emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2528   DONE;
2529 })
2530
2531 (define_insn "vsx_xvcvsxwdp_df"
2532   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2533         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2534                    UNSPEC_VSX_CVSXWDP))]
2535   "TARGET_VSX"
2536   "xvcvsxwdp %x0,%x1"
2537   [(set_attr "type" "vecdouble")])
2538
2539 (define_insn "vsx_xvcvuxwdp_df"
2540   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2541         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2542                    UNSPEC_VSX_CVUXWDP))]
2543   "TARGET_VSX"
2544   "xvcvuxwdp %x0,%x1"
2545   [(set_attr "type" "vecdouble")])
2546
2547 ;; Convert vector of 32-bit floating point numbers to vector of
2548 ;; 64-bit signed/unsigned integers.
2549 (define_insn "vsx_xvcvsp<su>xds_be"
2550   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2551      (any_fix:V2DI
2552        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2553          (parallel [(const_int 0) (const_int 2)]))))]
2554   "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2555   "xvcvsp<su>xds %x0,%x1"
2556   [(set_attr "type" "vecdouble")])
2557
2558 (define_insn "vsx_xvcvsp<su>xds_le"
2559   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2560      (any_fix:V2DI
2561        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2562          (parallel [(const_int 1) (const_int 3)]))))]
2563   "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2564   "xvcvsp<su>xds %x0,%x1"
2565   [(set_attr "type" "vecdouble")])
2566
2567 (define_expand "vsx_xvcvsp<su>xds"
2568   [(match_operand:V2DI 0 "vsx_register_operand")
2569    (match_operand:V4SF 1 "vsx_register_operand")
2570    (any_fix (pc))]
2571   "VECTOR_UNIT_VSX_P (V2DFmode)"
2572 {
2573   if (BYTES_BIG_ENDIAN)
2574     emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2575   else
2576     emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2577   DONE;
2578 })
2579
2580 ;; Generate float2 double
2581 ;; convert two double to float
2582 (define_expand "float2_v2df"
2583   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2584    (use (match_operand:V2DF 1 "register_operand" "wa"))
2585    (use (match_operand:V2DF 2 "register_operand" "wa"))]
2586  "VECTOR_UNIT_VSX_P (V4SFmode)"
2587 {
2588   rtx rtx_src1, rtx_src2, rtx_dst;
2589
2590   rtx_dst = operands[0];
2591   rtx_src1 = operands[1];
2592   rtx_src2 = operands[2];
2593
2594   rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2595   DONE;
2596 })
2597
2598 ;; Generate float2
2599 ;; convert two long long signed ints to float
2600 (define_expand "float2_v2di"
2601   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2602    (use (match_operand:V2DI 1 "register_operand" "wa"))
2603    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2604  "VECTOR_UNIT_VSX_P (V4SFmode)"
2605 {
2606   rtx rtx_src1, rtx_src2, rtx_dst;
2607
2608   rtx_dst = operands[0];
2609   rtx_src1 = operands[1];
2610   rtx_src2 = operands[2];
2611
2612   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2613   DONE;
2614 })
2615
2616 ;; Generate uns_float2
2617 ;; convert two long long unsigned ints to float
2618 (define_expand "uns_float2_v2di"
2619   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2620    (use (match_operand:V2DI 1 "register_operand" "wa"))
2621    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2622  "VECTOR_UNIT_VSX_P (V4SFmode)"
2623 {
2624   rtx rtx_src1, rtx_src2, rtx_dst;
2625
2626   rtx_dst = operands[0];
2627   rtx_src1 = operands[1];
2628   rtx_src2 = operands[2];
2629
2630   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2631   DONE;
2632 })
2633
2634 ;; Generate floate
2635 ;; convert  double or long long signed to float
2636 ;; (Only even words are valid, BE numbering)
2637 (define_expand "floate<mode>"
2638   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2639    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2640   "VECTOR_UNIT_VSX_P (V4SFmode)"
2641 {
2642   if (BYTES_BIG_ENDIAN)
2643     {
2644       /* Shift left one word to put even word correct location */
2645       rtx rtx_tmp;
2646       rtx rtx_val = GEN_INT (4);
2647
2648       rtx_tmp = gen_reg_rtx (V4SFmode);
2649       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2650       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2651                  rtx_tmp, rtx_tmp, rtx_val));
2652     }
2653   else
2654     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2655
2656   DONE;
2657 })
2658
2659 ;; Generate uns_floate
2660 ;; convert long long unsigned to float
2661 ;; (Only even words are valid, BE numbering)
2662 (define_expand "unsfloatev2di"
2663   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2664    (use (match_operand:V2DI 1 "register_operand" "wa"))]
2665   "VECTOR_UNIT_VSX_P (V4SFmode)"
2666 {
2667   if (BYTES_BIG_ENDIAN)
2668     {
2669       /* Shift left one word to put even word correct location */
2670       rtx rtx_tmp;
2671       rtx rtx_val = GEN_INT (4);
2672
2673       rtx_tmp = gen_reg_rtx (V4SFmode);
2674       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2675       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2676                  rtx_tmp, rtx_tmp, rtx_val));
2677     }
2678   else
2679     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2680
2681   DONE;
2682 })
2683
2684 ;; Generate floato
2685 ;; convert double or long long signed to float
2686 ;; Only odd words are valid, BE numbering)
2687 (define_expand "floato<mode>"
2688   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2689    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2690   "VECTOR_UNIT_VSX_P (V4SFmode)"
2691 {
2692   if (BYTES_BIG_ENDIAN)
2693     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2694   else
2695     {
2696       /* Shift left one word to put odd word correct location */
2697       rtx rtx_tmp;
2698       rtx rtx_val = GEN_INT (4);
2699
2700       rtx_tmp = gen_reg_rtx (V4SFmode);
2701       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2702       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2703                  rtx_tmp, rtx_tmp, rtx_val));
2704     }
2705   DONE;
2706 })
2707
2708 ;; Generate uns_floato
2709 ;; convert long long unsigned to float
2710 ;; (Only odd words are valid, BE numbering)
2711 (define_expand "unsfloatov2di"
2712  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2713   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2714  "VECTOR_UNIT_VSX_P (V4SFmode)"
2715 {
2716   if (BYTES_BIG_ENDIAN)
2717     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2718   else
2719     {
2720       /* Shift left one word to put odd word correct location */
2721       rtx rtx_tmp;
2722       rtx rtx_val = GEN_INT (4);
2723
2724       rtx_tmp = gen_reg_rtx (V4SFmode);
2725       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2726       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2727                  rtx_tmp, rtx_tmp, rtx_val));
2728     }
2729   DONE;
2730 })
2731
2732 ;; Generate vsigned2
2733 ;; convert two double float vectors to a vector of single precision ints
2734 (define_expand "vsigned2_v2df"
2735   [(match_operand:V4SI 0 "register_operand" "=wa")
2736    (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2737                  (match_operand:V2DF 2 "register_operand" "wa")]
2738   UNSPEC_VSX_VSIGNED2)]
2739   "TARGET_VSX"
2740 {
2741   rtx rtx_src1, rtx_src2, rtx_dst;
2742   bool signed_convert=true;
2743
2744   rtx_dst = operands[0];
2745   rtx_src1 = operands[1];
2746   rtx_src2 = operands[2];
2747
2748   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2749   DONE;
2750 })
2751
2752 ;; Generate vsignedo_v2df
2753 ;; signed double float to int convert odd word
2754 (define_expand "vsignedo_v2df"
2755   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2756         (match_operand:V2DF 1 "register_operand" "wa"))]
2757   "TARGET_VSX"
2758 {
2759   if (BYTES_BIG_ENDIAN)
2760     {
2761       rtx rtx_tmp;
2762       rtx rtx_val = GEN_INT (12);
2763       rtx_tmp = gen_reg_rtx (V4SImode);
2764
2765       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2766
2767       /* Big endian word numbering for words in operand is 0 1 2 3.
2768          take (operand[1] operand[1]) and shift left one word
2769          0 1 2 3    0 1 2 3  =>  1 2 3 0
2770          Words 1 and 3 are now are now where they need to be for result.  */
2771
2772       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2773                  rtx_tmp, rtx_val));
2774     }
2775   else
2776     /* Little endian word numbering for operand is 3 2 1 0.
2777        Result words 3 and 1 are where they need to be.  */
2778     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2779
2780   DONE;
2781 }
2782   [(set_attr "type" "veccomplex")])
2783
2784 ;; Generate vsignede_v2df
2785 ;; signed double float to int even word
2786 (define_expand "vsignede_v2df"
2787   [(set (match_operand:V4SI 0 "register_operand" "=v")
2788         (match_operand:V2DF 1 "register_operand" "v"))]
2789   "TARGET_VSX"
2790 {
2791   if (BYTES_BIG_ENDIAN)
2792     /* Big endian word numbering for words in operand is 0 1
2793        Result words 0 is where they need to be.  */
2794     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2795
2796   else
2797     {
2798       rtx rtx_tmp;
2799       rtx rtx_val = GEN_INT (12);
2800       rtx_tmp = gen_reg_rtx (V4SImode);
2801
2802       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2803
2804       /* Little endian word numbering for operand is 3 2 1 0.
2805          take (operand[1] operand[1]) and shift left three words
2806          0 1 2 3   0 1 2 3  =>  3 0 1 2
2807          Words 0 and 2 are now where they need to be for the result.  */
2808       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2809                  rtx_tmp, rtx_val));
2810     }
2811   DONE;
2812 }
2813   [(set_attr "type" "veccomplex")])
2814
2815 ;; Generate unsigned2
2816 ;; convert two double float vectors to a vector of single precision
2817 ;; unsigned ints
2818 (define_expand "vunsigned2_v2df"
2819 [(match_operand:V4SI 0 "register_operand" "=v")
2820  (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2821                (match_operand:V2DF 2 "register_operand" "v")]
2822               UNSPEC_VSX_VSIGNED2)]
2823  "TARGET_VSX"
2824 {
2825   rtx rtx_src1, rtx_src2, rtx_dst;
2826   bool signed_convert=false;
2827
2828   rtx_dst = operands[0];
2829   rtx_src1 = operands[1];
2830   rtx_src2 = operands[2];
2831
2832   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2833   DONE;
2834 })
2835
2836 ;; Generate vunsignedo_v2df
2837 ;; unsigned double float to int convert odd word
2838 (define_expand "vunsignedo_v2df"
2839   [(set (match_operand:V4SI 0 "register_operand" "=v")
2840         (match_operand:V2DF 1 "register_operand" "v"))]
2841   "TARGET_VSX"
2842 {
2843   if (BYTES_BIG_ENDIAN)
2844     {
2845       rtx rtx_tmp;
2846       rtx rtx_val = GEN_INT (12);
2847       rtx_tmp = gen_reg_rtx (V4SImode);
2848
2849       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2850
2851       /* Big endian word numbering for words in operand is 0 1 2 3.
2852          take (operand[1] operand[1]) and shift left one word
2853          0 1 2 3    0 1 2 3  =>  1 2 3 0
2854          Words 1 and 3 are now are now where they need to be for result.  */
2855
2856       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2857                  rtx_tmp, rtx_val));
2858     }
2859   else
2860     /* Little endian word numbering for operand is 3 2 1 0.
2861        Result words 3 and 1 are where they need to be.  */
2862     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2863
2864   DONE;
2865 }
2866   [(set_attr "type" "veccomplex")])
2867
2868 ;; Generate vunsignede_v2df
2869 ;; unsigned double float to int even word
2870 (define_expand "vunsignede_v2df"
2871   [(set (match_operand:V4SI 0 "register_operand" "=v")
2872         (match_operand:V2DF 1 "register_operand" "v"))]
2873   "TARGET_VSX"
2874 {
2875   if (BYTES_BIG_ENDIAN)
2876     /* Big endian word numbering for words in operand is 0 1
2877        Result words 0 is where they need to be.  */
2878     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2879
2880   else
2881     {
2882       rtx rtx_tmp;
2883       rtx rtx_val = GEN_INT (12);
2884       rtx_tmp = gen_reg_rtx (V4SImode);
2885
2886       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2887
2888       /* Little endian word numbering for operand is 3 2 1 0.
2889          take (operand[1] operand[1]) and shift left three words
2890          0 1 2 3   0 1 2 3  =>  3 0 1 2
2891          Words 0 and 2 are now where they need to be for the result.  */
2892       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2893                  rtx_tmp, rtx_val));
2894     }
2895   DONE;
2896 }
2897   [(set_attr "type" "veccomplex")])
2898
2899 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2900 ;; since the xvrdpiz instruction does not truncate the value if the floating
2901 ;; point value is < LONG_MIN or > LONG_MAX.
2902 (define_insn "*vsx_float_fix_v2df2"
2903   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2904         (float:V2DF
2905          (fix:V2DI
2906           (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2907   "TARGET_HARD_FLOAT
2908    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2909    && !flag_trapping_math && TARGET_FRIZ"
2910   "xvrdpiz %x0,%x1"
2911   [(set_attr "type" "vecdouble")])
2912
2913 \f
2914 ;; Permute operations
2915
2916 ;; Build a V2DF/V2DI vector from two scalars
2917 (define_insn "vsx_concat_<mode>"
2918   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2919         (vec_concat:VSX_D
2920          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2921          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2922   "VECTOR_MEM_VSX_P (<MODE>mode)"
2923 {
2924   if (which_alternative == 0)
2925     return (BYTES_BIG_ENDIAN
2926             ? "xxpermdi %x0,%x1,%x2,0"
2927             : "xxpermdi %x0,%x2,%x1,0");
2928
2929   else if (which_alternative == 1)
2930     return (BYTES_BIG_ENDIAN
2931             ? "mtvsrdd %x0,%1,%2"
2932             : "mtvsrdd %x0,%2,%1");
2933
2934   else
2935     gcc_unreachable ();
2936 }
2937   [(set_attr "type" "vecperm,vecmove")])
2938
2939 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2940 ;; word element in a vector register.
2941 (define_insn "*vsx_concat_<mode>_1"
2942   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2943         (vec_concat:VSX_D
2944          (vec_select:<VS_scalar>
2945           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2946           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2947          (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2948   "VECTOR_MEM_VSX_P (<MODE>mode)"
2949 {
2950   HOST_WIDE_INT dword = INTVAL (operands[2]);
2951   if (BYTES_BIG_ENDIAN)
2952     {
2953       operands[4] = GEN_INT (2*dword);
2954       return "xxpermdi %x0,%x1,%x3,%4";
2955     }
2956   else
2957     {
2958       operands[4] = GEN_INT (!dword);
2959       return "xxpermdi %x0,%x3,%x1,%4";
2960     }
2961 }
2962   [(set_attr "type" "vecperm")])
2963
2964 (define_insn "*vsx_concat_<mode>_2"
2965   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2966         (vec_concat:VSX_D
2967          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2968          (vec_select:<VS_scalar>
2969           (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2970           (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2971   "VECTOR_MEM_VSX_P (<MODE>mode)"
2972 {
2973   HOST_WIDE_INT dword = INTVAL (operands[3]);
2974   if (BYTES_BIG_ENDIAN)
2975     {
2976       operands[4] = GEN_INT (dword);
2977       return "xxpermdi %x0,%x1,%x2,%4";
2978     }
2979   else
2980     {
2981       operands[4] = GEN_INT (2 * !dword);
2982       return "xxpermdi %x0,%x2,%x1,%4";
2983     }
2984 }
2985   [(set_attr "type" "vecperm")])
2986
2987 (define_insn "*vsx_concat_<mode>_3"
2988   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2989         (vec_concat:VSX_D
2990          (vec_select:<VS_scalar>
2991           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2992           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2993          (vec_select:<VS_scalar>
2994           (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2995           (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2996   "VECTOR_MEM_VSX_P (<MODE>mode)"
2997 {
2998   HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2999   HOST_WIDE_INT dword2 = INTVAL (operands[4]);
3000   if (BYTES_BIG_ENDIAN)
3001     {
3002       operands[5] = GEN_INT ((2 * dword1) + dword2);
3003       return "xxpermdi %x0,%x1,%x3,%5";
3004     }
3005   else
3006     {
3007       operands[5] = GEN_INT ((2 * !dword2) + !dword1);
3008       return "xxpermdi %x0,%x3,%x1,%5";
3009     }
3010 }
3011   [(set_attr "type" "vecperm")])
3012
3013 ;; Special purpose concat using xxpermdi to glue two single precision values
3014 ;; together, relying on the fact that internally scalar floats are represented
3015 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
3016 (define_insn "vsx_concat_v2sf"
3017   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
3018         (unspec:V2DF
3019          [(match_operand:SF 1 "vsx_register_operand" "wa")
3020           (match_operand:SF 2 "vsx_register_operand" "wa")]
3021          UNSPEC_VSX_CONCAT))]
3022   "VECTOR_MEM_VSX_P (V2DFmode)"
3023 {
3024   if (BYTES_BIG_ENDIAN)
3025     return "xxpermdi %x0,%x1,%x2,0";
3026   else
3027     return "xxpermdi %x0,%x2,%x1,0";
3028 }
3029   [(set_attr "type" "vecperm")])
3030
3031 ;; Concatenate 4 SImode elements into a V4SImode reg.
3032 (define_expand "vsx_init_v4si"
3033   [(use (match_operand:V4SI 0 "gpc_reg_operand"))
3034    (use (match_operand:SI 1 "gpc_reg_operand"))
3035    (use (match_operand:SI 2 "gpc_reg_operand"))
3036    (use (match_operand:SI 3 "gpc_reg_operand"))
3037    (use (match_operand:SI 4 "gpc_reg_operand"))]
3038    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3039 {
3040   rtx a = gen_lowpart_SUBREG (DImode, operands[1]);
3041   rtx b = gen_lowpart_SUBREG (DImode, operands[2]);
3042   rtx c = gen_lowpart_SUBREG (DImode, operands[3]);
3043   rtx d = gen_lowpart_SUBREG (DImode, operands[4]);
3044   if (!BYTES_BIG_ENDIAN)
3045     {
3046       std::swap (a, b);
3047       std::swap (c, d);
3048     }
3049
3050   rtx ab = gen_reg_rtx (DImode);
3051   rtx cd = gen_reg_rtx (DImode);
3052   emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b,
3053                                    GEN_INT (0xffffffff)));
3054   emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d,
3055                                    GEN_INT (0xffffffff)));
3056
3057   rtx abcd = gen_reg_rtx (V2DImode);
3058   emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
3059   emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
3060   DONE;
3061 })
3062
3063 ;; xxpermdi for little endian loads and stores.  We need several of
3064 ;; these since the form of the PARALLEL differs by mode.
3065 (define_insn "*vsx_xxpermdi2_le_<mode>"
3066   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3067         (vec_select:VSX_D
3068           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3069           (parallel [(const_int 1) (const_int 0)])))]
3070   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3071   "xxpermdi %x0,%x1,%x1,2"
3072   [(set_attr "type" "vecperm")])
3073
3074 (define_insn "xxswapd_v16qi"
3075   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3076         (vec_select:V16QI
3077           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3078           (parallel [(const_int 8) (const_int 9)
3079                      (const_int 10) (const_int 11)
3080                      (const_int 12) (const_int 13)
3081                      (const_int 14) (const_int 15)
3082                      (const_int 0) (const_int 1)
3083                      (const_int 2) (const_int 3)
3084                      (const_int 4) (const_int 5)
3085                      (const_int 6) (const_int 7)])))]
3086   "TARGET_VSX"
3087 ;; AIX does not support the extended mnemonic xxswapd.  Use the basic
3088 ;; mnemonic xxpermdi instead.
3089   "xxpermdi %x0,%x1,%x1,2"
3090   [(set_attr "type" "vecperm")])
3091
3092 (define_insn "xxswapd_v8hi"
3093   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3094         (vec_select:V8HI
3095           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3096           (parallel [(const_int 4) (const_int 5)
3097                      (const_int 6) (const_int 7)
3098                      (const_int 0) (const_int 1)
3099                      (const_int 2) (const_int 3)])))]
3100   "TARGET_VSX"
3101 ;; AIX does not support the extended mnemonic xxswapd.  Use the basic
3102 ;; mnemonic xxpermdi instead.
3103   "xxpermdi %x0,%x1,%x1,2"
3104   [(set_attr "type" "vecperm")])
3105
3106 (define_insn "xxswapd_<mode>"
3107   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3108         (vec_select:VSX_W
3109           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3110           (parallel [(const_int 2) (const_int 3)
3111                      (const_int 0) (const_int 1)])))]
3112   "TARGET_VSX"
3113 ;; AIX does not support extended mnemonic xxswapd.  Use the basic
3114 ;; mnemonic xxpermdi instead.
3115   "xxpermdi %x0,%x1,%x1,2"
3116   [(set_attr "type" "vecperm")])
3117
3118 (define_insn "xxswapd_<mode>"
3119   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3120         (vec_select:VSX_D
3121           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3122           (parallel [(const_int 1) (const_int 0)])))]
3123   "TARGET_VSX"
3124 ;; AIX does not support extended mnemonic xxswapd.  Use the basic
3125 ;; mnemonic xxpermdi instead.
3126   "xxpermdi %x0,%x1,%x1,2"
3127   [(set_attr "type" "vecperm")])
3128
3129 (define_insn "xxgenpcvm_<mode>_internal"
3130   [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3131         (unspec:VSX_EXTRACT_I4
3132          [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3133           (match_operand:QI 2 "const_0_to_3_operand" "n")]
3134          UNSPEC_XXGENPCV))]
3135     "TARGET_POWER10"
3136     "xxgenpcv<wd>m %x0,%1,%2"
3137     [(set_attr "type" "vecsimple")])
3138
3139 (define_expand "xxgenpcvm_<mode>"
3140   [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3141    (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3142    (use (match_operand:QI 2 "immediate_operand"))]
3143   "TARGET_POWER10"
3144 {
3145   if (!BYTES_BIG_ENDIAN)
3146     {
3147       /* gen_xxgenpcvm assumes Big Endian order.  If LE,
3148          change swap upper and lower double words.  */
3149       rtx tmp = gen_reg_rtx (<MODE>mode);
3150
3151       emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3152       operands[1] = tmp;
3153     }
3154     emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3155                                               operands[2]));
3156   DONE;
3157 })
3158
3159 ;; lxvd2x for little endian loads.  We need several of
3160 ;; these since the form of the PARALLEL differs by mode.
3161 (define_insn "*vsx_lxvd2x2_le_<mode>"
3162   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3163         (vec_select:VSX_D
3164           (match_operand:VSX_D 1 "memory_operand" "Z")
3165           (parallel [(const_int 1) (const_int 0)])))]
3166   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3167   "lxvd2x %x0,%y1"
3168   [(set_attr "type" "vecload")])
3169
3170 (define_insn "*vsx_lxvd2x4_le_<mode>"
3171   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3172         (vec_select:VSX_W
3173           (match_operand:VSX_W 1 "memory_operand" "Z")
3174           (parallel [(const_int 2) (const_int 3)
3175                      (const_int 0) (const_int 1)])))]
3176   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3177   "lxvd2x %x0,%y1"
3178   [(set_attr "type" "vecload")])
3179
3180 (define_insn "*vsx_lxvd2x8_le_V8HI"
3181   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3182         (vec_select:V8HI
3183           (match_operand:V8HI 1 "memory_operand" "Z")
3184           (parallel [(const_int 4) (const_int 5)
3185                      (const_int 6) (const_int 7)
3186                      (const_int 0) (const_int 1)
3187                      (const_int 2) (const_int 3)])))]
3188   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3189   "lxvd2x %x0,%y1"
3190   [(set_attr "type" "vecload")])
3191
3192 (define_insn "*vsx_lxvd2x16_le_V16QI"
3193   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3194         (vec_select:V16QI
3195           (match_operand:V16QI 1 "memory_operand" "Z")
3196           (parallel [(const_int 8) (const_int 9)
3197                      (const_int 10) (const_int 11)
3198                      (const_int 12) (const_int 13)
3199                      (const_int 14) (const_int 15)
3200                      (const_int 0) (const_int 1)
3201                      (const_int 2) (const_int 3)
3202                      (const_int 4) (const_int 5)
3203                      (const_int 6) (const_int 7)])))]
3204   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3205   "lxvd2x %x0,%y1"
3206   [(set_attr "type" "vecload")])
3207
3208 ;; stxvd2x for little endian stores.  We need several of
3209 ;; these since the form of the PARALLEL differs by mode.
3210 (define_insn "*vsx_stxvd2x2_le_<mode>"
3211   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3212         (vec_select:VSX_D
3213           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3214           (parallel [(const_int 1) (const_int 0)])))]
3215   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3216   "stxvd2x %x1,%y0"
3217   [(set_attr "type" "vecstore")])
3218
3219 (define_insn "*vsx_stxvd2x4_le_<mode>"
3220   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3221         (vec_select:VSX_W
3222           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3223           (parallel [(const_int 2) (const_int 3)
3224                      (const_int 0) (const_int 1)])))]
3225   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3226   "stxvd2x %x1,%y0"
3227   [(set_attr "type" "vecstore")])
3228
3229 (define_insn "*vsx_stxvd2x8_le_V8HI"
3230   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3231         (vec_select:V8HI
3232           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3233           (parallel [(const_int 4) (const_int 5)
3234                      (const_int 6) (const_int 7)
3235                      (const_int 0) (const_int 1)
3236                      (const_int 2) (const_int 3)])))]
3237   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3238   "stxvd2x %x1,%y0"
3239   [(set_attr "type" "vecstore")])
3240
3241 (define_insn "*vsx_stxvd2x16_le_V16QI"
3242   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3243         (vec_select:V16QI
3244           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3245           (parallel [(const_int 8) (const_int 9)
3246                      (const_int 10) (const_int 11)
3247                      (const_int 12) (const_int 13)
3248                      (const_int 14) (const_int 15)
3249                      (const_int 0) (const_int 1)
3250                      (const_int 2) (const_int 3)
3251                      (const_int 4) (const_int 5)
3252                      (const_int 6) (const_int 7)])))]
3253   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3254   "stxvd2x %x1,%y0"
3255   [(set_attr "type" "vecstore")])
3256
3257 ;; Convert a TImode value into V1TImode
3258 (define_expand "vsx_set_v1ti"
3259   [(match_operand:V1TI 0 "nonimmediate_operand")
3260    (match_operand:V1TI 1 "nonimmediate_operand")
3261    (match_operand:TI 2 "input_operand")
3262    (match_operand:QI 3 "u5bit_cint_operand")]
3263   "VECTOR_MEM_VSX_P (V1TImode)"
3264 {
3265   if (operands[3] != const0_rtx)
3266     gcc_unreachable ();
3267
3268   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3269   DONE;
3270 })
3271
3272 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3273 (define_expand "vsx_set_<mode>"
3274   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3275    (use (match_operand:VSX_D 1 "vsx_register_operand"))
3276    (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3277    (use (match_operand:QI 3 "const_0_to_1_operand"))]
3278   "VECTOR_MEM_VSX_P (<MODE>mode)"
3279 {
3280   rtx dest = operands[0];
3281   rtx vec_reg = operands[1];
3282   rtx value = operands[2];
3283   rtx ele = operands[3];
3284   rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3285
3286   if (ele == const0_rtx)
3287     {
3288       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3289       emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3290       DONE;
3291     }
3292   else if (ele == const1_rtx)
3293     {
3294       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3295       emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3296       DONE;
3297     }
3298   else
3299     gcc_unreachable ();
3300 })
3301
3302 ;; Extract a DF/DI element from V2DF/V2DI
3303 ;; Optimize cases were we can do a simple or direct move.
3304 ;; Or see if we can avoid doing the move at all
3305
3306 ;; There are some unresolved problems with reload that show up if an Altivec
3307 ;; register was picked.  Limit the scalar value to FPRs for now.
3308
3309 (define_insn "vsx_extract_<mode>"
3310   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d,  wr, wr")
3311         (vec_select:<VS_scalar>
3312          (match_operand:VSX_D 1 "gpc_reg_operand"      "wa, wa, wa, wa")
3313          (parallel
3314           [(match_operand:QI 2 "const_0_to_1_operand"  "wD, n,  wD, n")])))]
3315   "VECTOR_MEM_VSX_P (<MODE>mode)"
3316 {
3317   int element = INTVAL (operands[2]);
3318   int op0_regno = REGNO (operands[0]);
3319   int op1_regno = REGNO (operands[1]);
3320   int fldDM;
3321
3322   gcc_assert (IN_RANGE (element, 0, 1));
3323   gcc_assert (VSX_REGNO_P (op1_regno));
3324
3325   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3326     {
3327       if (op0_regno == op1_regno)
3328         return ASM_COMMENT_START " vec_extract to same register";
3329
3330       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3331                && TARGET_POWERPC64)
3332         return "mfvsrd %0,%x1";
3333
3334       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3335         return "fmr %0,%1";
3336
3337       else if (VSX_REGNO_P (op0_regno))
3338         return "xxlor %x0,%x1,%x1";
3339
3340       else
3341         gcc_unreachable ();
3342     }
3343
3344   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3345            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3346     return "mfvsrld %0,%x1";
3347
3348   else if (VSX_REGNO_P (op0_regno))
3349     {
3350       fldDM = element << 1;
3351       if (!BYTES_BIG_ENDIAN)
3352         fldDM = 3 - fldDM;
3353       operands[3] = GEN_INT (fldDM);
3354       return "xxpermdi %x0,%x1,%x1,%3";
3355     }
3356
3357   else
3358     gcc_unreachable ();
3359 }
3360   [(set_attr "type" "veclogical,mfvsr,mfvsr,vecperm")
3361    (set_attr "isa" "*,*,p8v,p9v")])
3362
3363 ;; Optimize extracting a single scalar element from memory.
3364 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3365   [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3366         (vec_select:<VSX_D:VS_scalar>
3367          (match_operand:VSX_D 1 "memory_operand" "m,m")
3368          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3369    (clobber (match_scratch:P 3 "=&b,&b"))]
3370   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3371   "#"
3372   "&& reload_completed"
3373   [(set (match_dup 0) (match_dup 4))]
3374 {
3375   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3376                                            operands[3], <VSX_D:VS_scalar>mode);
3377 }
3378   [(set_attr "type" "fpload,load")
3379    (set_attr "length" "8")])
3380
3381 ;; Optimize storing a single scalar element that is the right location to
3382 ;; memory
3383 (define_insn "*vsx_extract_<mode>_store"
3384   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3385         (vec_select:<VS_scalar>
3386          (match_operand:VSX_D 1 "register_operand" "d,v,v")
3387          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3388   "VECTOR_MEM_VSX_P (<MODE>mode)"
3389   "@
3390    stfd%U0%X0 %1,%0
3391    stxsdx %x1,%y0
3392    stxsd %1,%0"
3393   [(set_attr "type" "fpstore")
3394    (set_attr "isa" "*,p7v,p9v")])
3395
3396 ;; Variable V2DI/V2DF extract shift
3397 (define_insn "vsx_vslo_<mode>"
3398   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3399         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3400                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3401                             UNSPEC_VSX_VSLO))]
3402   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3403   "vslo %0,%1,%2"
3404   [(set_attr "type" "vecperm")])
3405
3406 ;; Variable V2DI/V2DF extract from a register
3407 (define_insn_and_split "vsx_extract_<mode>_var"
3408   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3409         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3410                              (match_operand:DI 2 "gpc_reg_operand" "r")]
3411                             UNSPEC_VSX_EXTRACT))
3412    (clobber (match_scratch:DI 3 "=r"))
3413    (clobber (match_scratch:V2DI 4 "=&v"))]
3414   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3415   "#"
3416   "&& reload_completed"
3417   [(const_int 0)]
3418 {
3419   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3420                                 operands[3], operands[4]);
3421   DONE;
3422 })
3423
3424 ;; Variable V2DI/V2DF extract from memory
3425 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3426   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r")
3427         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3428                              (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3429                             UNSPEC_VSX_EXTRACT))
3430    (clobber (match_scratch:DI 3 "=&b,&b"))]
3431   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3432   "#"
3433   "&& reload_completed"
3434   [(set (match_dup 0) (match_dup 4))]
3435 {
3436   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3437                                            operands[3], <VS_scalar>mode);
3438 }
3439   [(set_attr "type" "fpload,load")])
3440
3441 ;; Extract a SF element from V4SF
3442 (define_insn_and_split "vsx_extract_v4sf"
3443   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3444         (vec_select:SF
3445          (match_operand:V4SF 1 "vsx_register_operand" "wa")
3446          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3447    (clobber (match_scratch:V4SF 3 "=0"))]
3448   "VECTOR_UNIT_VSX_P (V4SFmode)"
3449   "#"
3450   "&& 1"
3451   [(const_int 0)]
3452 {
3453   rtx op0 = operands[0];
3454   rtx op1 = operands[1];
3455   rtx op2 = operands[2];
3456   rtx op3 = operands[3];
3457   rtx tmp;
3458   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3459
3460   if (ele == 0)
3461     tmp = op1;
3462   else
3463     {
3464       if (GET_CODE (op3) == SCRATCH)
3465         op3 = gen_reg_rtx (V4SFmode);
3466       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3467       tmp = op3;
3468     }
3469   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3470   DONE;
3471 }
3472   [(set_attr "length" "8")
3473    (set_attr "type" "fp")])
3474
3475 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3476   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3477         (vec_select:SF
3478          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3479          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3480    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3481   "VECTOR_MEM_VSX_P (V4SFmode)"
3482   "#"
3483   "&& reload_completed"
3484   [(set (match_dup 0) (match_dup 4))]
3485 {
3486   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3487                                            operands[3], SFmode);
3488 }
3489   [(set_attr "type" "fpload,fpload,fpload,load")
3490    (set_attr "length" "8")
3491    (set_attr "isa" "*,p7v,p9v,*")])
3492
3493 ;; Variable V4SF extract from a register
3494 (define_insn_and_split "vsx_extract_v4sf_var"
3495   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3496         (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3497                     (match_operand:DI 2 "gpc_reg_operand" "r")]
3498                    UNSPEC_VSX_EXTRACT))
3499    (clobber (match_scratch:DI 3 "=r"))
3500    (clobber (match_scratch:V2DI 4 "=&v"))]
3501   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3502   "#"
3503   "&& reload_completed"
3504   [(const_int 0)]
3505 {
3506   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3507                                 operands[3], operands[4]);
3508   DONE;
3509 })
3510
3511 ;; Variable V4SF extract from memory
3512 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3513   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3514         (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3515                     (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3516                    UNSPEC_VSX_EXTRACT))
3517    (clobber (match_scratch:DI 3 "=&b,&b"))]
3518   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3519   "#"
3520   "&& reload_completed"
3521   [(set (match_dup 0) (match_dup 4))]
3522 {
3523   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3524                                            operands[3], SFmode);
3525 }
3526   [(set_attr "type" "fpload,load")])
3527
3528 ;; Expand the builtin form of xxpermdi to canonical rtl.
3529 (define_expand "vsx_xxpermdi_<mode>"
3530   [(match_operand:VSX_L 0 "vsx_register_operand")
3531    (match_operand:VSX_L 1 "vsx_register_operand")
3532    (match_operand:VSX_L 2 "vsx_register_operand")
3533    (match_operand:QI 3 "u5bit_cint_operand")]
3534   "VECTOR_MEM_VSX_P (<MODE>mode)"
3535 {
3536   rtx target = operands[0];
3537   rtx op0 = operands[1];
3538   rtx op1 = operands[2];
3539   int mask = INTVAL (operands[3]);
3540   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3541   rtx perm1 = GEN_INT ((mask & 1) + 2);
3542   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3543
3544   if (<MODE>mode == V2DFmode)
3545     gen = gen_vsx_xxpermdi2_v2df_1;
3546   else
3547     {
3548       gen = gen_vsx_xxpermdi2_v2di_1;
3549       if (<MODE>mode != V2DImode)
3550         {
3551           target = gen_lowpart (V2DImode, target);
3552           op0 = gen_lowpart (V2DImode, op0);
3553           op1 = gen_lowpart (V2DImode, op1);
3554         }
3555     }
3556   emit_insn (gen (target, op0, op1, perm0, perm1));
3557   DONE;
3558 })
3559
3560 ;; Special version of xxpermdi that retains big-endian semantics.
3561 (define_expand "vsx_xxpermdi_<mode>_be"
3562   [(match_operand:VSX_L 0 "vsx_register_operand")
3563    (match_operand:VSX_L 1 "vsx_register_operand")
3564    (match_operand:VSX_L 2 "vsx_register_operand")
3565    (match_operand:QI 3 "u5bit_cint_operand")]
3566   "VECTOR_MEM_VSX_P (<MODE>mode)"
3567 {
3568   rtx target = operands[0];
3569   rtx op0 = operands[1];
3570   rtx op1 = operands[2];
3571   int mask = INTVAL (operands[3]);
3572   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3573   rtx perm1 = GEN_INT ((mask & 1) + 2);
3574   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3575
3576   if (<MODE>mode == V2DFmode)
3577     gen = gen_vsx_xxpermdi2_v2df_1;
3578   else
3579     {
3580       gen = gen_vsx_xxpermdi2_v2di_1;
3581       if (<MODE>mode != V2DImode)
3582         {
3583           target = gen_lowpart (V2DImode, target);
3584           op0 = gen_lowpart (V2DImode, op0);
3585           op1 = gen_lowpart (V2DImode, op1);
3586         }
3587     }
3588   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3589      transformation we don't want; it is necessary for
3590      rs6000_expand_vec_perm_const_1 but not for this use.  So we
3591      prepare for that by reversing the transformation here.  */
3592   if (BYTES_BIG_ENDIAN)
3593     emit_insn (gen (target, op0, op1, perm0, perm1));
3594   else
3595     {
3596       rtx p0 = GEN_INT (3 - INTVAL (perm1));
3597       rtx p1 = GEN_INT (3 - INTVAL (perm0));
3598       emit_insn (gen (target, op1, op0, p0, p1));
3599     }
3600   DONE;
3601 })
3602
3603 (define_insn "vsx_xxpermdi2_<mode>_1"
3604   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3605         (vec_select:VSX_D
3606           (vec_concat:<VS_double>
3607             (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3608             (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3609           (parallel [(match_operand 3 "const_0_to_1_operand" "")
3610                      (match_operand 4 "const_2_to_3_operand" "")])))]
3611   "VECTOR_MEM_VSX_P (<MODE>mode)"
3612 {
3613   int op3, op4, mask;
3614
3615   /* For little endian, swap operands and invert/swap selectors
3616      to get the correct xxpermdi.  The operand swap sets up the
3617      inputs as a little endian array.  The selectors are swapped
3618      because they are defined to use big endian ordering.  The
3619      selectors are inverted to get the correct doublewords for
3620      little endian ordering.  */
3621   if (BYTES_BIG_ENDIAN)
3622     {
3623       op3 = INTVAL (operands[3]);
3624       op4 = INTVAL (operands[4]);
3625     }
3626   else
3627     {
3628       op3 = 3 - INTVAL (operands[4]);
3629       op4 = 3 - INTVAL (operands[3]);
3630     }
3631
3632   mask = (op3 << 1) | (op4 - 2);
3633   operands[3] = GEN_INT (mask);
3634
3635   if (BYTES_BIG_ENDIAN)
3636     return "xxpermdi %x0,%x1,%x2,%3";
3637   else
3638     return "xxpermdi %x0,%x2,%x1,%3";
3639 }
3640   [(set_attr "type" "vecperm")])
3641
3642 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3643 ;; none of the small types were allowed in a vector register, so we had to
3644 ;; extract to a DImode and either do a direct move or store.
3645 (define_expand  "vsx_extract_<mode>"
3646   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3647                    (vec_select:<VS_scalar>
3648                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3649                     (parallel [(match_operand:QI 2 "const_int_operand")])))
3650               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3651   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3652 {
3653   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3654   if (TARGET_P9_VECTOR)
3655     {
3656       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3657                                             operands[2]));
3658       DONE;
3659     }
3660 })
3661
3662 (define_insn "vsx_extract_<mode>_p9"
3663   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3664         (vec_select:<VS_scalar>
3665          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3666          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3667    (clobber (match_scratch:SI 3 "=r,X"))]
3668   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3669 {
3670   if (which_alternative == 0)
3671     return "#";
3672
3673   else
3674     {
3675       HOST_WIDE_INT elt = INTVAL (operands[2]);
3676       HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3677                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3678                                : elt);
3679
3680       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3681       HOST_WIDE_INT offset = unit_size * elt_adj;
3682
3683       operands[2] = GEN_INT (offset);
3684       if (unit_size == 4)
3685         return "xxextractuw %x0,%x1,%2";
3686       else
3687         return "vextractu<wd> %0,%1,%2";
3688     }
3689 }
3690   [(set_attr "type" "vecsimple")
3691    (set_attr "isa" "p9v,*")])
3692
3693 (define_split
3694   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3695         (vec_select:<VS_scalar>
3696          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3697          (parallel [(match_operand:QI 2 "const_int_operand")])))
3698    (clobber (match_operand:SI 3 "int_reg_operand"))]
3699   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3700   [(const_int 0)]
3701 {
3702   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3703   rtx op1 = operands[1];
3704   rtx op2 = operands[2];
3705   rtx op3 = operands[3];
3706   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3707
3708   emit_move_insn (op3, GEN_INT (offset));
3709   if (BYTES_BIG_ENDIAN)
3710     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3711   else
3712     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3713   DONE;
3714 })
3715
3716 ;; Optimize zero extracts to eliminate the AND after the extract.
3717 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3718   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3719         (zero_extend:DI
3720          (vec_select:<VS_scalar>
3721           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3722           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3723    (clobber (match_scratch:SI 3 "=r,X"))]
3724   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3725   "#"
3726   "&& reload_completed"
3727   [(parallel [(set (match_dup 4)
3728                    (vec_select:<VS_scalar>
3729                     (match_dup 1)
3730                     (parallel [(match_dup 2)])))
3731               (clobber (match_dup 3))])]
3732 {
3733   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3734 }
3735   [(set_attr "isa" "p9v,*")])
3736
3737 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3738 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3739   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3740         (vec_select:<VS_scalar>
3741          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3742          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3743    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&*r"))
3744    (clobber (match_scratch:SI 4 "=X,&r"))]
3745   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3746   "#"
3747   "&& reload_completed"
3748   [(parallel [(set (match_dup 3)
3749                    (vec_select:<VS_scalar>
3750                     (match_dup 1)
3751                     (parallel [(match_dup 2)])))
3752               (clobber (match_dup 4))])
3753    (set (match_dup 0)
3754         (match_dup 3))])
3755
3756 (define_insn_and_split  "*vsx_extract_si"
3757   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3758         (vec_select:SI
3759          (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3760          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3761    (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3762   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3763   "#"
3764   "&& reload_completed"
3765   [(const_int 0)]
3766 {
3767   rtx dest = operands[0];
3768   rtx src = operands[1];
3769   rtx element = operands[2];
3770   rtx vec_tmp = operands[3];
3771   int value;
3772
3773   if (!BYTES_BIG_ENDIAN)
3774     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3775
3776   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3777      instruction.  */
3778   value = INTVAL (element);
3779   if (value != 1)
3780     emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3781   else
3782     vec_tmp = src;
3783
3784   if (MEM_P (operands[0]))
3785     {
3786       if (can_create_pseudo_p ())
3787         dest = rs6000_force_indexed_or_indirect_mem (dest);
3788
3789       if (TARGET_P8_VECTOR)
3790         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3791       else
3792         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3793     }
3794
3795   else if (TARGET_P8_VECTOR)
3796     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3797   else
3798     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3799                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
3800
3801   DONE;
3802 }
3803   [(set_attr "type" "mfvsr,vecperm,fpstore")
3804    (set_attr "length" "8")
3805    (set_attr "isa" "*,p8v,*")])
3806
3807 (define_insn_and_split  "*vsx_extract_<mode>_p8"
3808   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3809         (vec_select:<VS_scalar>
3810          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3811          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3812    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3813   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3814    && !TARGET_P9_VECTOR"
3815   "#"
3816   "&& reload_completed"
3817   [(const_int 0)]
3818 {
3819   rtx dest = operands[0];
3820   rtx src = operands[1];
3821   rtx element = operands[2];
3822   rtx vec_tmp = operands[3];
3823   int value;
3824
3825   if (!BYTES_BIG_ENDIAN)
3826     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3827
3828   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3829      instruction.  */
3830   value = INTVAL (element);
3831   if (<MODE>mode == V16QImode)
3832     {
3833       if (value != 7)
3834         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3835       else
3836         vec_tmp = src;
3837     }
3838   else if (<MODE>mode == V8HImode)
3839     {
3840       if (value != 3)
3841         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3842       else
3843         vec_tmp = src;
3844     }
3845   else
3846     gcc_unreachable ();
3847
3848   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3849                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
3850   DONE;
3851 }
3852   [(set_attr "type" "mfvsr")])
3853
3854 ;; Optimize extracting a single scalar element from memory.
3855 (define_insn_and_split "*vsx_extract_<mode>_load"
3856   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3857         (vec_select:<VS_scalar>
3858          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3859          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3860    (clobber (match_scratch:DI 3 "=&b"))]
3861   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3862   "#"
3863   "&& reload_completed"
3864   [(set (match_dup 0) (match_dup 4))]
3865 {
3866   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3867                                            operands[3], <VS_scalar>mode);
3868 }
3869   [(set_attr "type" "load")
3870    (set_attr "length" "8")])
3871
3872 ;; Variable V16QI/V8HI/V4SI extract from a register
3873 (define_insn_and_split "vsx_extract_<mode>_var"
3874   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
3875         (unspec:<VS_scalar>
3876          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3877           (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3878          UNSPEC_VSX_EXTRACT))
3879    (clobber (match_scratch:DI 3 "=r,r"))
3880    (clobber (match_scratch:V2DI 4 "=X,&v"))]
3881   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3882   "#"
3883   "&& reload_completed"
3884   [(const_int 0)]
3885 {
3886   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3887                                 operands[3], operands[4]);
3888   DONE;
3889 }
3890   [(set_attr "isa" "p9v,*")])
3891
3892 ;; Variable V16QI/V8HI/V4SI extract from memory
3893 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3894   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r")
3895         (unspec:<VS_scalar>
3896          [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3897           (match_operand:DI 2 "gpc_reg_operand" "r")]
3898          UNSPEC_VSX_EXTRACT))
3899    (clobber (match_scratch:DI 3 "=&b"))]
3900   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3901   "#"
3902   "&& reload_completed"
3903   [(set (match_dup 0) (match_dup 4))]
3904 {
3905   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3906                                            operands[3], <VS_scalar>mode);
3907 }
3908   [(set_attr "type" "load")])
3909
3910 ;; ISA 3.1 extract
3911 (define_expand "vextractl<mode>"
3912   [(set (match_operand:V2DI 0 "altivec_register_operand")
3913         (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
3914                       (match_operand:VI2 2 "altivec_register_operand")
3915                       (match_operand:SI 3 "register_operand")]
3916                      UNSPEC_EXTRACTL))]
3917   "TARGET_POWER10"
3918 {
3919   if (BYTES_BIG_ENDIAN)
3920     {
3921       emit_insn (gen_vextractl<mode>_internal (operands[0], operands[1],
3922                                                operands[2], operands[3]));
3923       emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
3924     }
3925   else
3926     emit_insn (gen_vextractr<mode>_internal (operands[0], operands[2],
3927                                              operands[1], operands[3]));
3928   DONE;
3929 })
3930
3931 (define_insn "vextractl<mode>_internal"
3932   [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
3933         (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
3934                       (match_operand:VEC_I 2 "altivec_register_operand" "v")
3935                       (match_operand:SI 3 "register_operand" "r")]
3936                      UNSPEC_EXTRACTL))]
3937   "TARGET_POWER10"
3938   "vext<du_or_d><wd>vlx %0,%1,%2,%3"
3939   [(set_attr "type" "vecsimple")])
3940
3941 (define_expand "vextractr<mode>"
3942   [(set (match_operand:V2DI 0 "altivec_register_operand")
3943         (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
3944                       (match_operand:VI2 2 "altivec_register_operand")
3945                       (match_operand:SI 3 "register_operand")]
3946                      UNSPEC_EXTRACTR))]
3947   "TARGET_POWER10"
3948 {
3949   if (BYTES_BIG_ENDIAN)
3950     {
3951       emit_insn (gen_vextractr<mode>_internal (operands[0], operands[1],
3952                                                operands[2], operands[3]));
3953       emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
3954     }
3955   else
3956     emit_insn (gen_vextractl<mode>_internal (operands[0], operands[2],
3957                                              operands[1], operands[3]));
3958   DONE;
3959 })
3960
3961 (define_insn "vextractr<mode>_internal"
3962   [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
3963         (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
3964                       (match_operand:VEC_I 2 "altivec_register_operand" "v")
3965                       (match_operand:SI 3 "register_operand" "r")]
3966                      UNSPEC_EXTRACTR))]
3967   "TARGET_POWER10"
3968   "vext<du_or_d><wd>vrx %0,%1,%2,%3"
3969   [(set_attr "type" "vecsimple")])
3970
3971 (define_expand "vinsertvl_<mode>"
3972   [(set (match_operand:VI2 0 "altivec_register_operand")
3973         (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
3974                      (match_operand:VI2 2 "altivec_register_operand")
3975                      (match_operand:SI 3 "register_operand" "r")]
3976                     UNSPEC_INSERTL))]
3977   "TARGET_POWER10"
3978 {
3979   if (BYTES_BIG_ENDIAN)
3980      emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
3981                                                operands[1], operands[2]));
3982    else
3983      emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
3984                                                operands[1], operands[2]));
3985    DONE;
3986 })
3987
3988 (define_insn "vinsertvl_internal_<mode>"
3989   [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
3990         (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
3991                        (match_operand:VEC_I 2 "altivec_register_operand" "v")
3992                        (match_operand:VEC_I 3 "altivec_register_operand" "0")]
3993                       UNSPEC_INSERTL))]
3994   "TARGET_POWER10"
3995   "vins<wd>vlx %0,%1,%2"
3996   [(set_attr "type" "vecsimple")])
3997
3998 (define_expand "vinsertvr_<mode>"
3999   [(set (match_operand:VI2 0 "altivec_register_operand")
4000         (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
4001                      (match_operand:VI2 2 "altivec_register_operand")
4002                      (match_operand:SI 3 "register_operand" "r")]
4003                     UNSPEC_INSERTR))]
4004   "TARGET_POWER10"
4005 {
4006   if (BYTES_BIG_ENDIAN)
4007      emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
4008                                                operands[1], operands[2]));
4009    else
4010      emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
4011                                                operands[1], operands[2]));
4012    DONE;
4013 })
4014
4015 (define_insn "vinsertvr_internal_<mode>"
4016   [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4017         (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4018                        (match_operand:VEC_I 2 "altivec_register_operand" "v")
4019                        (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4020                       UNSPEC_INSERTR))]
4021   "TARGET_POWER10"
4022   "vins<wd>vrx %0,%1,%2"
4023   [(set_attr "type" "vecsimple")])
4024
4025 (define_expand "vinsertgl_<mode>"
4026   [(set (match_operand:VI2 0 "altivec_register_operand")
4027         (unspec:VI2 [(match_operand:SI 1 "register_operand")
4028                      (match_operand:VI2 2 "altivec_register_operand")
4029                      (match_operand:SI 3 "register_operand")]
4030                     UNSPEC_INSERTL))]
4031   "TARGET_POWER10"
4032 {
4033   if (BYTES_BIG_ENDIAN)
4034     emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4035                                             operands[1], operands[2]));
4036   else
4037     emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4038                                             operands[1], operands[2]));
4039   DONE;
4040  })
4041
4042 (define_insn "vinsertgl_internal_<mode>"
4043  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4044        (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4045                       (match_operand:SI 2 "register_operand" "r")
4046                       (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4047                      UNSPEC_INSERTL))]
4048  "TARGET_POWER10"
4049  "vins<wd>lx %0,%1,%2"
4050  [(set_attr "type" "vecsimple")])
4051
4052 (define_expand "vinsertgr_<mode>"
4053   [(set (match_operand:VI2 0 "altivec_register_operand")
4054         (unspec:VI2 [(match_operand:SI 1 "register_operand")
4055                      (match_operand:VI2 2 "altivec_register_operand")
4056                      (match_operand:SI 3 "register_operand")]
4057                     UNSPEC_INSERTR))]
4058   "TARGET_POWER10"
4059 {
4060   if (BYTES_BIG_ENDIAN)
4061     emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4062                                             operands[1], operands[2]));
4063   else
4064     emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4065                                             operands[1], operands[2]));
4066   DONE;
4067  })
4068
4069 (define_insn "vinsertgr_internal_<mode>"
4070  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4071    (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4072                   (match_operand:SI 2 "register_operand" "r")
4073                   (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4074                  UNSPEC_INSERTR))]
4075  "TARGET_POWER10"
4076  "vins<wd>rx %0,%1,%2"
4077  [(set_attr "type" "vecsimple")])
4078
4079 (define_expand "vreplace_elt_<mode>"
4080   [(set (match_operand:REPLACE_ELT 0 "register_operand")
4081   (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
4082                        (match_operand:<VS_scalar> 2 "register_operand")
4083                        (match_operand:QI 3 "const_0_to_3_operand")]
4084                       UNSPEC_REPLACE_ELT))]
4085  "TARGET_POWER10"
4086 {
4087    int index;
4088    /* Immediate value is the word index, convert to byte index and adjust for
4089       Endianness if needed.  */
4090    if (BYTES_BIG_ENDIAN)
4091      index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
4092
4093    else
4094      index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
4095
4096    emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4097                                             operands[2],
4098                                             GEN_INT (index)));
4099    DONE;
4100  }
4101 [(set_attr "type" "vecsimple")])
4102
4103 (define_expand "vreplace_un_<mode>"
4104  [(set (match_operand:REPLACE_ELT 0 "register_operand")
4105  (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
4106                       (match_operand:<VS_scalar> 2 "register_operand")
4107                       (match_operand:QI 3 "const_0_to_12_operand")]
4108                      UNSPEC_REPLACE_UN))]
4109  "TARGET_POWER10"
4110 {
4111    /* Immediate value is the byte index Big Endian numbering.  */
4112    emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4113                                             operands[2], operands[3]));
4114    DONE;
4115  }
4116 [(set_attr "type" "vecsimple")])
4117
4118 (define_insn "vreplace_elt_<mode>_inst"
4119  [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v")
4120   (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0")
4121                        (match_operand:<VS_scalar> 2 "register_operand" "r")
4122                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
4123                       UNSPEC_REPLACE_ELT))]
4124  "TARGET_POWER10"
4125  "vins<REPLACE_ELT_char> %0,%2,%3"
4126  [(set_attr "type" "vecsimple")])
4127
4128 ;; VSX_EXTRACT optimizations
4129 ;; Optimize double d = (double) vec_extract (vi, <n>)
4130 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
4131 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
4132   [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
4133         (any_float:DF
4134          (vec_select:SI
4135           (match_operand:V4SI 1 "gpc_reg_operand" "v")
4136           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4137    (clobber (match_scratch:V4SI 3 "=v"))]
4138   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4139   "#"
4140   "&& 1"
4141   [(const_int 0)]
4142 {
4143   rtx dest = operands[0];
4144   rtx src = operands[1];
4145   rtx element = operands[2];
4146   rtx v4si_tmp = operands[3];
4147   int value;
4148
4149   if (!BYTES_BIG_ENDIAN)
4150     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
4151
4152   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4153      instruction.  */
4154   value = INTVAL (element);
4155   if (value != 0)
4156     {
4157       if (GET_CODE (v4si_tmp) == SCRATCH)
4158         v4si_tmp = gen_reg_rtx (V4SImode);
4159       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4160     }
4161   else
4162     v4si_tmp = src;
4163
4164   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
4165   DONE;
4166 })
4167
4168 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
4169 ;; where <type> is a floating point type that supported by the hardware that is
4170 ;; not double.  First convert the value to double, and then to the desired
4171 ;; type.
4172 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
4173   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
4174         (any_float:VSX_EXTRACT_FL
4175          (vec_select:SI
4176           (match_operand:V4SI 1 "gpc_reg_operand" "v")
4177           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4178    (clobber (match_scratch:V4SI 3 "=v"))
4179    (clobber (match_scratch:DF 4 "=wa"))]
4180   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4181   "#"
4182   "&& 1"
4183   [(const_int 0)]
4184 {
4185   rtx dest = operands[0];
4186   rtx src = operands[1];
4187   rtx element = operands[2];
4188   rtx v4si_tmp = operands[3];
4189   rtx df_tmp = operands[4];
4190   int value;
4191
4192   if (!BYTES_BIG_ENDIAN)
4193     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
4194
4195   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4196      instruction.  */
4197   value = INTVAL (element);
4198   if (value != 0)
4199     {
4200       if (GET_CODE (v4si_tmp) == SCRATCH)
4201         v4si_tmp = gen_reg_rtx (V4SImode);
4202       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4203     }
4204   else
4205     v4si_tmp = src;
4206
4207   if (GET_CODE (df_tmp) == SCRATCH)
4208     df_tmp = gen_reg_rtx (DFmode);
4209
4210   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
4211
4212   if (<MODE>mode == SFmode)
4213     emit_insn (gen_truncdfsf2 (dest, df_tmp));
4214   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
4215     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
4216   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
4217            && TARGET_FLOAT128_HW)
4218     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
4219   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
4220     emit_insn (gen_extenddfif2 (dest, df_tmp));
4221   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
4222     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
4223   else
4224     gcc_unreachable ();
4225
4226   DONE;
4227 })
4228
4229 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
4230 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
4231 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
4232 ;; vector short or vector unsigned short.
4233 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
4234   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4235         (float:FL_CONV
4236          (vec_select:<VSX_EXTRACT_I:VS_scalar>
4237           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4238           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4239    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
4240   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4241    && TARGET_P9_VECTOR"
4242   "#"
4243   "&& reload_completed"
4244   [(parallel [(set (match_dup 3)
4245                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
4246                     (match_dup 1)
4247                     (parallel [(match_dup 2)])))
4248               (clobber (scratch:SI))])
4249    (set (match_dup 4)
4250         (sign_extend:DI (match_dup 3)))
4251    (set (match_dup 0)
4252         (float:<FL_CONV:MODE> (match_dup 4)))]
4253 {
4254   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4255 }
4256   [(set_attr "isa" "<FL_CONV:VSisa>")])
4257
4258 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
4259   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4260         (unsigned_float:FL_CONV
4261          (vec_select:<VSX_EXTRACT_I:VS_scalar>
4262           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4263           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4264    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
4265   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4266    && TARGET_P9_VECTOR"
4267   "#"
4268   "&& reload_completed"
4269   [(parallel [(set (match_dup 3)
4270                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
4271                     (match_dup 1)
4272                     (parallel [(match_dup 2)])))
4273               (clobber (scratch:SI))])
4274    (set (match_dup 0)
4275         (float:<FL_CONV:MODE> (match_dup 4)))]
4276 {
4277   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4278 }
4279   [(set_attr "isa" "<FL_CONV:VSisa>")])
4280
4281 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
4282 (define_insn "vsx_set_<mode>_p9"
4283   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
4284         (unspec:VSX_EXTRACT_I
4285          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
4286           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
4287           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
4288          UNSPEC_VSX_SET))]
4289   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4290 {
4291   int ele = INTVAL (operands[3]);
4292   int nunits = GET_MODE_NUNITS (<MODE>mode);
4293
4294   if (!BYTES_BIG_ENDIAN)
4295     ele = nunits - 1 - ele;
4296
4297   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
4298   if (<MODE>mode == V4SImode)
4299     return "xxinsertw %x0,%x2,%3";
4300   else
4301     return "vinsert<wd> %0,%2,%3";
4302 }
4303   [(set_attr "type" "vecperm")])
4304
4305 (define_insn_and_split "vsx_set_v4sf_p9"
4306   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4307         (unspec:V4SF
4308          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4309           (match_operand:SF 2 "gpc_reg_operand" "wa")
4310           (match_operand:QI 3 "const_0_to_3_operand" "n")]
4311          UNSPEC_VSX_SET))
4312    (clobber (match_scratch:SI 4 "=&wa"))]
4313   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4314   "#"
4315   "&& reload_completed"
4316   [(set (match_dup 5)
4317         (unspec:V4SF [(match_dup 2)]
4318                      UNSPEC_VSX_CVDPSPN))
4319    (parallel [(set (match_dup 4)
4320                    (vec_select:SI (match_dup 6)
4321                                   (parallel [(match_dup 7)])))
4322               (clobber (scratch:SI))])
4323    (set (match_dup 8)
4324         (unspec:V4SI [(match_dup 8)
4325                       (match_dup 4)
4326                       (match_dup 3)]
4327                      UNSPEC_VSX_SET))]
4328 {
4329   unsigned int tmp_regno = reg_or_subregno (operands[4]);
4330
4331   operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4332   operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4333   operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4334   operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4335 }
4336   [(set_attr "type" "vecperm")
4337    (set_attr "length" "12")
4338    (set_attr "isa" "p9v")])
4339
4340 ;; Special case setting 0.0f to a V4SF element
4341 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4342   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4343         (unspec:V4SF
4344          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4345           (match_operand:SF 2 "zero_fp_constant" "j")
4346           (match_operand:QI 3 "const_0_to_3_operand" "n")]
4347          UNSPEC_VSX_SET))
4348    (clobber (match_scratch:SI 4 "=&wa"))]
4349   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4350   "#"
4351   "&& reload_completed"
4352   [(set (match_dup 4)
4353         (const_int 0))
4354    (set (match_dup 5)
4355         (unspec:V4SI [(match_dup 5)
4356                       (match_dup 4)
4357                       (match_dup 3)]
4358                      UNSPEC_VSX_SET))]
4359 {
4360   operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4361 }
4362   [(set_attr "type" "vecperm")
4363    (set_attr "length" "8")
4364    (set_attr "isa" "p9v")])
4365
4366 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4367 ;; that is in the default scalar position (1 for big endian, 2 for little
4368 ;; endian).  We just need to do an xxinsertw since the element is in the
4369 ;; correct location.
4370
4371 (define_insn "*vsx_insert_extract_v4sf_p9"
4372   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4373         (unspec:V4SF
4374          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4375           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4376                          (parallel
4377                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4378           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4379          UNSPEC_VSX_SET))]
4380   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4381    && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4382 {
4383   int ele = INTVAL (operands[4]);
4384
4385   if (!BYTES_BIG_ENDIAN)
4386     ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4387
4388   operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4389   return "xxinsertw %x0,%x2,%4";
4390 }
4391   [(set_attr "type" "vecperm")])
4392
4393 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4394 ;; that is in the default scalar position (1 for big endian, 2 for little
4395 ;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4396
4397 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4398   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4399         (unspec:V4SF
4400          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4401           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4402                          (parallel
4403                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4404           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4405          UNSPEC_VSX_SET))
4406    (clobber (match_scratch:SI 5 "=&wa"))]
4407   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4408    && TARGET_P9_VECTOR && TARGET_POWERPC64
4409    && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4410   "#"
4411   "&& 1"
4412   [(parallel [(set (match_dup 5)
4413                    (vec_select:SI (match_dup 6)
4414                                   (parallel [(match_dup 3)])))
4415               (clobber (scratch:SI))])
4416    (set (match_dup 7)
4417         (unspec:V4SI [(match_dup 8)
4418                       (match_dup 5)
4419                       (match_dup 4)]
4420                      UNSPEC_VSX_SET))]
4421 {
4422   if (GET_CODE (operands[5]) == SCRATCH)
4423     operands[5] = gen_reg_rtx (SImode);
4424
4425   operands[6] = gen_lowpart (V4SImode, operands[2]);
4426   operands[7] = gen_lowpart (V4SImode, operands[0]);
4427   operands[8] = gen_lowpart (V4SImode, operands[1]);
4428 }
4429   [(set_attr "type" "vecperm")
4430    (set_attr "isa" "p9v")])
4431
4432 ;; Expanders for builtins
4433 (define_expand "vsx_mergel_<mode>"
4434   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4435    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4436    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4437   "VECTOR_MEM_VSX_P (<MODE>mode)"
4438 {
4439   rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4440   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4441   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4442   emit_insn (gen_rtx_SET (operands[0], x));
4443   DONE;
4444 })
4445
4446 (define_expand "vsx_mergeh_<mode>"
4447   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4448    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4449    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4450   "VECTOR_MEM_VSX_P (<MODE>mode)"
4451 {
4452   rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4453   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4454   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4455   emit_insn (gen_rtx_SET (operands[0], x));
4456   DONE;
4457 })
4458
4459 ;; V2DF/V2DI splat
4460 ;; We separate the register splat insn from the memory splat insn to force the
4461 ;; register allocator to generate the indexed form of the SPLAT when it is
4462 ;; given an offsettable memory reference.  Otherwise, if the register and
4463 ;; memory insns were combined into a single insn, the register allocator will
4464 ;; load the value into a register, and then do a double word permute.
4465 (define_expand "vsx_splat_<mode>"
4466   [(set (match_operand:VSX_D 0 "vsx_register_operand")
4467         (vec_duplicate:VSX_D
4468          (match_operand:<VS_scalar> 1 "input_operand")))]
4469   "VECTOR_MEM_VSX_P (<MODE>mode)"
4470 {
4471   rtx op1 = operands[1];
4472   if (MEM_P (op1))
4473     operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4474   else if (!REG_P (op1))
4475     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4476 })
4477
4478 (define_insn "vsx_splat_<mode>_reg"
4479   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4480         (vec_duplicate:VSX_D
4481          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4482   "VECTOR_MEM_VSX_P (<MODE>mode)"
4483   "@
4484    xxpermdi %x0,%x1,%x1,0
4485    mtvsrdd %x0,%1,%1"
4486   [(set_attr "type" "vecperm,vecmove")])
4487
4488 (define_insn "vsx_splat_<mode>_mem"
4489   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4490         (vec_duplicate:VSX_D
4491          (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4492   "VECTOR_MEM_VSX_P (<MODE>mode)"
4493   "lxvdsx %x0,%y1"
4494   [(set_attr "type" "vecload")])
4495
4496 ;; V4SI splat support
4497 (define_insn "vsx_splat_v4si"
4498   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4499         (vec_duplicate:V4SI
4500          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4501   "TARGET_P9_VECTOR"
4502   "@
4503    mtvsrws %x0,%1
4504    lxvwsx %x0,%y1"
4505   [(set_attr "type" "vecperm,vecload")])
4506
4507 ;; SImode is not currently allowed in vector registers.  This pattern
4508 ;; allows us to use direct move to get the value in a vector register
4509 ;; so that we can use XXSPLTW
4510 (define_insn "vsx_splat_v4si_di"
4511   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4512         (vec_duplicate:V4SI
4513          (truncate:SI
4514           (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4515   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4516   "@
4517    xxspltw %x0,%x1,1
4518    mtvsrws %x0,%1"
4519   [(set_attr "type" "vecperm")
4520    (set_attr "isa" "p8v,*")])
4521
4522 ;; V4SF splat (ISA 3.0)
4523 (define_insn_and_split "vsx_splat_v4sf"
4524   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4525         (vec_duplicate:V4SF
4526          (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4527   "TARGET_P9_VECTOR"
4528   "@
4529    lxvwsx %x0,%y1
4530    #
4531    mtvsrws %x0,%1"
4532   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4533   [(set (match_dup 0)
4534         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4535    (set (match_dup 0)
4536         (unspec:V4SF [(match_dup 0)
4537                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4538   ""
4539   [(set_attr "type" "vecload,vecperm,vecperm")
4540    (set_attr "length" "*,8,*")
4541    (set_attr "isa" "*,p8v,*")])
4542
4543 ;; V4SF/V4SI splat from a vector element
4544 (define_insn "vsx_xxspltw_<mode>"
4545   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4546         (vec_duplicate:VSX_W
4547          (vec_select:<VS_scalar>
4548           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4549           (parallel
4550            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4551   "VECTOR_MEM_VSX_P (<MODE>mode)"
4552 {
4553   if (!BYTES_BIG_ENDIAN)
4554     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4555
4556   return "xxspltw %x0,%x1,%2";
4557 }
4558   [(set_attr "type" "vecperm")])
4559
4560 (define_insn "vsx_xxspltw_<mode>_direct"
4561   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4562         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4563                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4564                       UNSPEC_VSX_XXSPLTW))]
4565   "VECTOR_MEM_VSX_P (<MODE>mode)"
4566   "xxspltw %x0,%x1,%2"
4567   [(set_attr "type" "vecperm")])
4568
4569 ;; V16QI/V8HI splat support on ISA 2.07
4570 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4571   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4572         (vec_duplicate:VSX_SPLAT_I
4573          (truncate:<VS_scalar>
4574           (match_operand:DI 1 "altivec_register_operand" "v"))))]
4575   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4576   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4577   [(set_attr "type" "vecperm")])
4578
4579 ;; V2DF/V2DI splat for use by vec_splat builtin
4580 (define_insn "vsx_xxspltd_<mode>"
4581   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4582         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4583                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4584                       UNSPEC_VSX_XXSPLTD))]
4585   "VECTOR_MEM_VSX_P (<MODE>mode)"
4586 {
4587   if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4588       || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4589     return "xxpermdi %x0,%x1,%x1,0";
4590   else
4591     return "xxpermdi %x0,%x1,%x1,3";
4592 }
4593   [(set_attr "type" "vecperm")])
4594
4595 ;; V4SF/V4SI interleave
4596 (define_insn "vsx_xxmrghw_<mode>"
4597   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4598         (vec_select:VSX_W
4599           (vec_concat:<VS_double>
4600             (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4601             (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4602           (parallel [(const_int 0) (const_int 4)
4603                      (const_int 1) (const_int 5)])))]
4604   "VECTOR_MEM_VSX_P (<MODE>mode)"
4605 {
4606   if (BYTES_BIG_ENDIAN)
4607     return "xxmrghw %x0,%x1,%x2";
4608   else
4609     return "xxmrglw %x0,%x2,%x1";
4610 }
4611   [(set_attr "type" "vecperm")])
4612
4613 (define_insn "vsx_xxmrglw_<mode>"
4614   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4615         (vec_select:VSX_W
4616           (vec_concat:<VS_double>
4617             (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4618             (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4619           (parallel [(const_int 2) (const_int 6)
4620                      (const_int 3) (const_int 7)])))]
4621   "VECTOR_MEM_VSX_P (<MODE>mode)"
4622 {
4623   if (BYTES_BIG_ENDIAN)
4624     return "xxmrglw %x0,%x1,%x2";
4625   else
4626     return "xxmrghw %x0,%x2,%x1";
4627 }
4628   [(set_attr "type" "vecperm")])
4629
4630 ;; Shift left double by word immediate
4631 (define_insn "vsx_xxsldwi_<mode>"
4632   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4633         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4634                        (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4635                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
4636                       UNSPEC_VSX_SLDWI))]
4637   "VECTOR_MEM_VSX_P (<MODE>mode)"
4638   "xxsldwi %x0,%x1,%x2,%3"
4639   [(set_attr "type" "vecperm")
4640    (set_attr "isa" "<VSisa>")])
4641
4642 \f
4643 ;; Vector reduction insns and splitters
4644
4645 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4646   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4647         (VEC_reduc:V2DF
4648          (vec_concat:V2DF
4649           (vec_select:DF
4650            (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4651            (parallel [(const_int 1)]))
4652           (vec_select:DF
4653            (match_dup 1)
4654            (parallel [(const_int 0)])))
4655          (match_dup 1)))
4656    (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4657   "VECTOR_UNIT_VSX_P (V2DFmode)"
4658   "#"
4659   "&& 1"
4660   [(const_int 0)]
4661 {
4662   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4663              ? gen_reg_rtx (V2DFmode)
4664              : operands[2];
4665   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4666   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4667   DONE;
4668 }
4669   [(set_attr "length" "8")
4670    (set_attr "type" "veccomplex")])
4671
4672 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4673   [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4674         (VEC_reduc:V4SF
4675          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4676          (match_operand:V4SF 1 "vfloat_operand" "wa")))
4677    (clobber (match_scratch:V4SF 2 "=&wa"))
4678    (clobber (match_scratch:V4SF 3 "=&wa"))]
4679   "VECTOR_UNIT_VSX_P (V4SFmode)"
4680   "#"
4681   "&& 1"
4682   [(const_int 0)]
4683 {
4684   rtx op0 = operands[0];
4685   rtx op1 = operands[1];
4686   rtx tmp2, tmp3, tmp4;
4687
4688   if (can_create_pseudo_p ())
4689     {
4690       tmp2 = gen_reg_rtx (V4SFmode);
4691       tmp3 = gen_reg_rtx (V4SFmode);
4692       tmp4 = gen_reg_rtx (V4SFmode);
4693     }
4694   else
4695     {
4696       tmp2 = operands[2];
4697       tmp3 = operands[3];
4698       tmp4 = tmp2;
4699     }
4700
4701   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4702   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4703   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4704   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4705   DONE;
4706 }
4707   [(set_attr "length" "16")
4708    (set_attr "type" "veccomplex")])
4709
4710 ;; Combiner patterns with the vector reduction patterns that knows we can get
4711 ;; to the top element of the V2DF array without doing an extract.
4712
4713 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4714   [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4715         (vec_select:DF
4716          (VEC_reduc:V2DF
4717           (vec_concat:V2DF
4718            (vec_select:DF
4719             (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4720             (parallel [(const_int 1)]))
4721            (vec_select:DF
4722             (match_dup 1)
4723             (parallel [(const_int 0)])))
4724           (match_dup 1))
4725          (parallel [(const_int 1)])))
4726    (clobber (match_scratch:DF 2 "=0,&wa"))]
4727   "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4728   "#"
4729   "&& 1"
4730   [(const_int 0)]
4731 {
4732   rtx hi = gen_highpart (DFmode, operands[1]);
4733   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4734             ? gen_reg_rtx (DFmode)
4735             : operands[2];
4736
4737   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4738   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4739   DONE;
4740 }
4741   [(set_attr "length" "8")
4742    (set_attr "type" "veccomplex")])
4743
4744 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4745   [(set (match_operand:SF 0 "vfloat_operand" "=f")
4746         (vec_select:SF
4747          (VEC_reduc:V4SF
4748           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4749           (match_operand:V4SF 1 "vfloat_operand" "wa"))
4750          (parallel [(const_int 3)])))
4751    (clobber (match_scratch:V4SF 2 "=&wa"))
4752    (clobber (match_scratch:V4SF 3 "=&wa"))
4753    (clobber (match_scratch:V4SF 4 "=0"))]
4754   "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4755   "#"
4756   "&& 1"
4757   [(const_int 0)]
4758 {
4759   rtx op0 = operands[0];
4760   rtx op1 = operands[1];
4761   rtx tmp2, tmp3, tmp4, tmp5;
4762
4763   if (can_create_pseudo_p ())
4764     {
4765       tmp2 = gen_reg_rtx (V4SFmode);
4766       tmp3 = gen_reg_rtx (V4SFmode);
4767       tmp4 = gen_reg_rtx (V4SFmode);
4768       tmp5 = gen_reg_rtx (V4SFmode);
4769     }
4770   else
4771     {
4772       tmp2 = operands[2];
4773       tmp3 = operands[3];
4774       tmp4 = tmp2;
4775       tmp5 = operands[4];
4776     }
4777
4778   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4779   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4780   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4781   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4782   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4783   DONE;
4784 }
4785   [(set_attr "length" "20")
4786    (set_attr "type" "veccomplex")])
4787
4788 \f
4789 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4790 (define_peephole
4791   [(set (match_operand:P 0 "base_reg_operand")
4792         (match_operand:P 1 "short_cint_operand"))
4793    (set (match_operand:VSX_M 2 "vsx_register_operand")
4794         (mem:VSX_M (plus:P (match_dup 0)
4795                            (match_operand:P 3 "int_reg_operand"))))]
4796   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4797   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4798   [(set_attr "length" "8")
4799    (set_attr "type" "vecload")])
4800
4801 (define_peephole
4802   [(set (match_operand:P 0 "base_reg_operand")
4803         (match_operand:P 1 "short_cint_operand"))
4804    (set (match_operand:VSX_M 2 "vsx_register_operand")
4805         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4806                            (match_dup 0))))]
4807   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4808   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4809   [(set_attr "length" "8")
4810    (set_attr "type" "vecload")])
4811
4812 \f
4813 ;; ISA 3.0 vector extend sign support
4814
4815 (define_insn "vsx_sign_extend_qi_<mode>"
4816   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4817         (unspec:VSINT_84
4818          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4819          UNSPEC_VSX_SIGN_EXTEND))]
4820   "TARGET_P9_VECTOR"
4821   "vextsb2<wd> %0,%1"
4822   [(set_attr "type" "vecexts")])
4823
4824 (define_insn "vsx_sign_extend_hi_<mode>"
4825   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4826         (unspec:VSINT_84
4827          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4828          UNSPEC_VSX_SIGN_EXTEND))]
4829   "TARGET_P9_VECTOR"
4830   "vextsh2<wd> %0,%1"
4831   [(set_attr "type" "vecexts")])
4832
4833 (define_insn "*vsx_sign_extend_si_v2di"
4834   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4835         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4836                      UNSPEC_VSX_SIGN_EXTEND))]
4837   "TARGET_P9_VECTOR"
4838   "vextsw2d %0,%1"
4839   [(set_attr "type" "vecexts")])
4840
4841 ;; ISA 3.1 vector sign extend
4842 ;; Move DI value from GPR to TI mode in VSX register, word 1.
4843 (define_insn "mtvsrdd_diti_w1"
4844   [(set (match_operand:TI 0 "register_operand" "=wa")
4845         (unspec:TI [(match_operand:DI 1 "register_operand" "r")]
4846                      UNSPEC_MTVSRD_DITI_W1))]
4847   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
4848   "mtvsrdd %x0,0,%1"
4849   [(set_attr "type" "vecmove")])
4850
4851 ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
4852 (define_insn "extendditi2_vector"
4853   [(set (match_operand:TI 0 "gpc_reg_operand" "=v")
4854         (unspec:TI [(match_operand:TI 1 "gpc_reg_operand" "v")]
4855                      UNSPEC_EXTENDDITI2))]
4856   "TARGET_POWER10"
4857   "vextsd2q %0,%1"
4858   [(set_attr "type" "vecexts")])
4859
4860 (define_expand "extendditi2"
4861   [(set (match_operand:TI 0 "gpc_reg_operand")
4862         (sign_extend:DI (match_operand:DI 1 "gpc_reg_operand")))]
4863   "TARGET_POWER10"
4864   {
4865     /* Move 64-bit src from GPR to vector reg and sign extend to 128-bits.  */
4866     rtx temp = gen_reg_rtx (TImode);
4867     emit_insn (gen_mtvsrdd_diti_w1 (temp, operands[1]));
4868     emit_insn (gen_extendditi2_vector (operands[0], temp));
4869     DONE;
4870   })
4871
4872 \f
4873 ;; ISA 3.0 Binary Floating-Point Support
4874
4875 ;; VSX Scalar Extract Exponent Quad-Precision
4876 (define_insn "xsxexpqp_<mode>"
4877   [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4878         (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4879          UNSPEC_VSX_SXEXPDP))]
4880   "TARGET_P9_VECTOR"
4881   "xsxexpqp %0,%1"
4882   [(set_attr "type" "vecmove")])
4883
4884 ;; VSX Scalar Extract Exponent Double-Precision
4885 (define_insn "xsxexpdp"
4886   [(set (match_operand:DI 0 "register_operand" "=r")
4887         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4888          UNSPEC_VSX_SXEXPDP))]
4889   "TARGET_P9_VECTOR && TARGET_64BIT"
4890   "xsxexpdp %0,%x1"
4891   [(set_attr "type" "integer")])
4892
4893 ;; VSX Scalar Extract Significand Quad-Precision
4894 (define_insn "xsxsigqp_<mode>"
4895   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4896         (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4897          UNSPEC_VSX_SXSIG))]
4898   "TARGET_P9_VECTOR"
4899   "xsxsigqp %0,%1"
4900   [(set_attr "type" "vecmove")])
4901
4902 ;; VSX Scalar Extract Significand Double-Precision
4903 (define_insn "xsxsigdp"
4904   [(set (match_operand:DI 0 "register_operand" "=r")
4905         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4906          UNSPEC_VSX_SXSIG))]
4907   "TARGET_P9_VECTOR && TARGET_64BIT"
4908   "xsxsigdp %0,%x1"
4909   [(set_attr "type" "integer")])
4910
4911 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4912 (define_insn "xsiexpqpf_<mode>"
4913   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4914         (unspec:IEEE128
4915          [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4916           (match_operand:DI 2 "altivec_register_operand" "v")]
4917          UNSPEC_VSX_SIEXPQP))]
4918   "TARGET_P9_VECTOR"
4919   "xsiexpqp %0,%1,%2"
4920   [(set_attr "type" "vecmove")])
4921
4922 ;; VSX Scalar Insert Exponent Quad-Precision
4923 (define_insn "xsiexpqp_<mode>"
4924   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4925         (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4926                          (match_operand:DI 2 "altivec_register_operand" "v")]
4927          UNSPEC_VSX_SIEXPQP))]
4928   "TARGET_P9_VECTOR"
4929   "xsiexpqp %0,%1,%2"
4930   [(set_attr "type" "vecmove")])
4931
4932 ;; VSX Scalar Insert Exponent Double-Precision
4933 (define_insn "xsiexpdp"
4934   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4935         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4936                     (match_operand:DI 2 "register_operand" "r")]
4937          UNSPEC_VSX_SIEXPDP))]
4938   "TARGET_P9_VECTOR && TARGET_64BIT"
4939   "xsiexpdp %x0,%1,%2"
4940   [(set_attr "type" "fpsimple")])
4941
4942 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4943 (define_insn "xsiexpdpf"
4944   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4945         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4946                     (match_operand:DI 2 "register_operand" "r")]
4947          UNSPEC_VSX_SIEXPDP))]
4948   "TARGET_P9_VECTOR && TARGET_64BIT"
4949   "xsiexpdp %x0,%1,%2"
4950   [(set_attr "type" "fpsimple")])
4951
4952 ;; VSX Scalar Compare Exponents Double-Precision
4953 (define_expand "xscmpexpdp_<code>"
4954   [(set (match_dup 3)
4955         (compare:CCFP
4956          (unspec:DF
4957           [(match_operand:DF 1 "vsx_register_operand" "wa")
4958            (match_operand:DF 2 "vsx_register_operand" "wa")]
4959           UNSPEC_VSX_SCMPEXPDP)
4960          (const_int 0)))
4961    (set (match_operand:SI 0 "register_operand" "=r")
4962         (CMP_TEST:SI (match_dup 3)
4963                      (const_int 0)))]
4964   "TARGET_P9_VECTOR"
4965 {
4966   if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
4967     {
4968       emit_move_insn (operands[0], const0_rtx);
4969       DONE;
4970     }
4971
4972   operands[3] = gen_reg_rtx (CCFPmode);
4973 })
4974
4975 (define_insn "*xscmpexpdp"
4976   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4977         (compare:CCFP
4978          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4979                      (match_operand:DF 2 "vsx_register_operand" "wa")]
4980           UNSPEC_VSX_SCMPEXPDP)
4981          (match_operand:SI 3 "zero_constant" "j")))]
4982   "TARGET_P9_VECTOR"
4983   "xscmpexpdp %0,%x1,%x2"
4984   [(set_attr "type" "fpcompare")])
4985
4986 ;; VSX Scalar Compare Exponents Quad-Precision
4987 (define_expand "xscmpexpqp_<code>_<mode>"
4988   [(set (match_dup 3)
4989         (compare:CCFP
4990          (unspec:IEEE128
4991           [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4992            (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4993           UNSPEC_VSX_SCMPEXPQP)
4994          (const_int 0)))
4995    (set (match_operand:SI 0 "register_operand" "=r")
4996         (CMP_TEST:SI (match_dup 3)
4997                      (const_int 0)))]
4998   "TARGET_P9_VECTOR"
4999 {
5000   if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
5001     {
5002       emit_move_insn (operands[0], const0_rtx);
5003       DONE;
5004     }
5005
5006   operands[3] = gen_reg_rtx (CCFPmode);
5007 })
5008
5009 (define_insn "*xscmpexpqp"
5010   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
5011         (compare:CCFP
5012          (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5013                           (match_operand:IEEE128 2 "altivec_register_operand" "v")]
5014           UNSPEC_VSX_SCMPEXPQP)
5015          (match_operand:SI 3 "zero_constant" "j")))]
5016   "TARGET_P9_VECTOR"
5017   "xscmpexpqp %0,%1,%2"
5018   [(set_attr "type" "fpcompare")])
5019
5020 ;; VSX Scalar Test Data Class Quad-Precision
5021 ;;  (Expansion for scalar_test_data_class (__ieee128, int))
5022 ;;   (Has side effect of setting the lt bit if operand 1 is negative,
5023 ;;    setting the eq bit if any of the conditions tested by operand 2
5024 ;;    are satisfied, and clearing the gt and undordered bits to zero.)
5025 (define_expand "xststdcqp_<mode>"
5026   [(set (match_dup 3)
5027         (compare:CCFP
5028          (unspec:IEEE128
5029           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5030            (match_operand:SI 2 "u7bit_cint_operand" "n")]
5031           UNSPEC_VSX_STSTDC)
5032          (const_int 0)))
5033    (set (match_operand:SI 0 "register_operand" "=r")
5034         (eq:SI (match_dup 3)
5035                (const_int 0)))]
5036   "TARGET_P9_VECTOR"
5037 {
5038   operands[3] = gen_reg_rtx (CCFPmode);
5039 })
5040
5041 ;; VSX Scalar Test Data Class Double- and Single-Precision
5042 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
5043 ;;   if any of the conditions tested by operand 2 are satisfied.
5044 ;;   The gt and unordered bits are cleared to zero.)
5045 (define_expand "xststdc<sd>p"
5046   [(set (match_dup 3)
5047         (compare:CCFP
5048          (unspec:SFDF
5049           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5050            (match_operand:SI 2 "u7bit_cint_operand" "n")]
5051           UNSPEC_VSX_STSTDC)
5052          (match_dup 4)))
5053    (set (match_operand:SI 0 "register_operand" "=r")
5054         (eq:SI (match_dup 3)
5055                (const_int 0)))]
5056   "TARGET_P9_VECTOR"
5057 {
5058   operands[3] = gen_reg_rtx (CCFPmode);
5059   operands[4] = CONST0_RTX (SImode);
5060 })
5061
5062 ;; The VSX Scalar Test Negative Quad-Precision
5063 (define_expand "xststdcnegqp_<mode>"
5064   [(set (match_dup 2)
5065         (compare:CCFP
5066          (unspec:IEEE128
5067           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5068            (const_int 0)]
5069           UNSPEC_VSX_STSTDC)
5070          (const_int 0)))
5071    (set (match_operand:SI 0 "register_operand" "=r")
5072         (lt:SI (match_dup 2)
5073                (const_int 0)))]
5074   "TARGET_P9_VECTOR"
5075 {
5076   operands[2] = gen_reg_rtx (CCFPmode);
5077 })
5078
5079 ;; The VSX Scalar Test Negative Double- and Single-Precision
5080 (define_expand "xststdcneg<sd>p"
5081   [(set (match_dup 2)
5082         (compare:CCFP
5083          (unspec:SFDF
5084           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5085            (const_int 0)]
5086           UNSPEC_VSX_STSTDC)
5087          (match_dup 3)))
5088    (set (match_operand:SI 0 "register_operand" "=r")
5089         (lt:SI (match_dup 2)
5090                (const_int 0)))]
5091   "TARGET_P9_VECTOR"
5092 {
5093   operands[2] = gen_reg_rtx (CCFPmode);
5094   operands[3] = CONST0_RTX (SImode);
5095 })
5096
5097 (define_insn "*xststdcqp_<mode>"
5098   [(set (match_operand:CCFP 0 "" "=y")
5099         (compare:CCFP
5100          (unspec:IEEE128
5101           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5102            (match_operand:SI 2 "u7bit_cint_operand" "n")]
5103           UNSPEC_VSX_STSTDC)
5104          (const_int 0)))]
5105   "TARGET_P9_VECTOR"
5106   "xststdcqp %0,%1,%2"
5107   [(set_attr "type" "fpcompare")])
5108
5109 (define_insn "*xststdc<sd>p"
5110   [(set (match_operand:CCFP 0 "" "=y")
5111         (compare:CCFP
5112          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5113                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
5114           UNSPEC_VSX_STSTDC)
5115          (match_operand:SI 3 "zero_constant" "j")))]
5116   "TARGET_P9_VECTOR"
5117   "xststdc<sd>p %0,%x1,%2"
5118   [(set_attr "type" "fpcompare")])
5119
5120 ;; VSX Vector Extract Exponent Double and Single Precision
5121 (define_insn "xvxexp<sd>p"
5122   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5123         (unspec:VSX_F
5124          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5125          UNSPEC_VSX_VXEXP))]
5126   "TARGET_P9_VECTOR"
5127   "xvxexp<sd>p %x0,%x1"
5128   [(set_attr "type" "vecsimple")])
5129
5130 ;; VSX Vector Extract Significand Double and Single Precision
5131 (define_insn "xvxsig<sd>p"
5132   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5133         (unspec:VSX_F
5134          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5135          UNSPEC_VSX_VXSIG))]
5136   "TARGET_P9_VECTOR"
5137   "xvxsig<sd>p %x0,%x1"
5138   [(set_attr "type" "vecsimple")])
5139
5140 ;; VSX Vector Insert Exponent Double and Single Precision
5141 (define_insn "xviexp<sd>p"
5142   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5143         (unspec:VSX_F
5144          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5145           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
5146          UNSPEC_VSX_VIEXP))]
5147   "TARGET_P9_VECTOR"
5148   "xviexp<sd>p %x0,%x1,%x2"
5149   [(set_attr "type" "vecsimple")])
5150
5151 ;; VSX Vector Test Data Class Double and Single Precision
5152 ;; The corresponding elements of the result vector are all ones
5153 ;; if any of the conditions tested by operand 3 are satisfied.
5154 (define_insn "xvtstdc<sd>p"
5155   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
5156         (unspec:<VSI>
5157          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5158           (match_operand:SI 2 "u7bit_cint_operand" "n")]
5159          UNSPEC_VSX_VTSTDC))]
5160   "TARGET_P9_VECTOR"
5161   "xvtstdc<sd>p %x0,%x1,%2"
5162   [(set_attr "type" "vecsimple")])
5163
5164 ;; ISA 3.0 String Operations Support
5165
5166 ;; Compare vectors producing a vector result and a predicate, setting CR6
5167 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
5168 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
5169 ;; need to match v4sf, v2df, or v2di modes because those are expanded
5170 ;; to use Power8 instructions.
5171 (define_insn "*vsx_ne_<mode>_p"
5172   [(set (reg:CC CR6_REGNO)
5173         (unspec:CC
5174          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
5175                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
5176          UNSPEC_PREDICATE))
5177    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
5178         (ne:VSX_EXTRACT_I (match_dup 1)
5179                           (match_dup 2)))]
5180   "TARGET_P9_VECTOR"
5181   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5182   [(set_attr "type" "vecsimple")])
5183
5184 (define_insn "*vector_nez_<mode>_p"
5185   [(set (reg:CC CR6_REGNO)
5186         (unspec:CC [(unspec:VI
5187                      [(match_operand:VI 1 "gpc_reg_operand" "v")
5188                       (match_operand:VI 2 "gpc_reg_operand" "v")]
5189                      UNSPEC_NEZ_P)]
5190          UNSPEC_PREDICATE))
5191    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
5192         (unspec:VI [(match_dup 1)
5193                     (match_dup 2)]
5194          UNSPEC_NEZ_P))]
5195   "TARGET_P9_VECTOR"
5196   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5197   [(set_attr "type" "vecsimple")])
5198
5199 ;; Return first position of match between vectors using natural order
5200 ;; for both LE and BE execution modes.
5201 (define_expand "first_match_index_<mode>"
5202   [(match_operand:SI 0 "register_operand")
5203    (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5204                (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5205   UNSPEC_VSX_FIRST_MATCH_INDEX)]
5206   "TARGET_P9_VECTOR"
5207 {
5208   int sh;
5209
5210   rtx cmp_result = gen_reg_rtx (<MODE>mode);
5211   rtx not_result = gen_reg_rtx (<MODE>mode);
5212
5213   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5214                                             operands[2]));
5215   emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
5216
5217   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5218
5219   if (<MODE>mode == V16QImode)
5220     {
5221       if (!BYTES_BIG_ENDIAN)
5222         emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
5223       else
5224         emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
5225     }
5226   else
5227     {
5228       rtx tmp = gen_reg_rtx (SImode);
5229       if (!BYTES_BIG_ENDIAN)
5230         emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
5231       else
5232         emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
5233       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5234     }
5235   DONE;
5236 })
5237
5238 ;; Return first position of match between vectors or end of string (EOS) using
5239 ;; natural element order for both LE and BE execution modes.
5240 (define_expand "first_match_or_eos_index_<mode>"
5241   [(match_operand:SI 0 "register_operand")
5242    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5243    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5244   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
5245   "TARGET_P9_VECTOR"
5246 {
5247   int sh;
5248   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5249   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5250   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5251   rtx and_result = gen_reg_rtx (<MODE>mode);
5252   rtx result = gen_reg_rtx (<MODE>mode);
5253   rtx vzero = gen_reg_rtx (<MODE>mode);
5254
5255   /* Vector with zeros in elements that correspond to zeros in operands.  */
5256   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5257   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5258   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5259   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5260
5261   /* Vector with ones in elments that do not match.  */
5262   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5263                                              operands[2]));
5264
5265   /* Create vector with ones in elements where there was a zero in one of
5266      the source elements or the elements that match.  */
5267   emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
5268   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5269
5270   if (<MODE>mode == V16QImode)
5271     {
5272       if (!BYTES_BIG_ENDIAN)
5273         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5274       else
5275         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5276     }
5277   else
5278     {
5279       rtx tmp = gen_reg_rtx (SImode);
5280       if (!BYTES_BIG_ENDIAN)
5281         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5282       else
5283         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5284       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5285     }
5286   DONE;
5287 })
5288
5289 ;; Return first position of mismatch between vectors using natural
5290 ;; element order for both LE and BE execution modes.
5291 (define_expand "first_mismatch_index_<mode>"
5292   [(match_operand:SI 0 "register_operand")
5293    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5294    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5295   UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
5296   "TARGET_P9_VECTOR"
5297 {
5298   int sh;
5299   rtx cmp_result = gen_reg_rtx (<MODE>mode);
5300
5301   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5302                                             operands[2]));
5303   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5304
5305   if (<MODE>mode == V16QImode)
5306     {
5307       if (!BYTES_BIG_ENDIAN)
5308         emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
5309       else
5310         emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
5311     }
5312   else
5313     {
5314       rtx tmp = gen_reg_rtx (SImode);
5315       if (!BYTES_BIG_ENDIAN)
5316         emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
5317       else
5318         emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
5319       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5320     }
5321   DONE;
5322 })
5323
5324 ;; Return first position of mismatch between vectors or end of string (EOS)
5325 ;; using natural element order for both LE and BE execution modes.
5326 (define_expand "first_mismatch_or_eos_index_<mode>"
5327   [(match_operand:SI 0 "register_operand")
5328    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5329    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5330   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
5331   "TARGET_P9_VECTOR"
5332 {
5333   int sh;
5334   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5335   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5336   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5337   rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
5338   rtx and_result = gen_reg_rtx (<MODE>mode);
5339   rtx result = gen_reg_rtx (<MODE>mode);
5340   rtx vzero = gen_reg_rtx (<MODE>mode);
5341
5342   /* Vector with zeros in elements that correspond to zeros in operands.  */
5343   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5344
5345   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5346   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5347   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5348
5349   /* Vector with ones in elments that match.  */
5350   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5351                                              operands[2]));
5352   emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
5353
5354   /* Create vector with ones in elements where there was a zero in one of
5355      the source elements or the elements did not match.  */
5356   emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5357   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5358
5359   if (<MODE>mode == V16QImode)
5360     {
5361       if (!BYTES_BIG_ENDIAN)
5362         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5363       else
5364         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5365     }
5366   else
5367     {
5368       rtx tmp = gen_reg_rtx (SImode);
5369       if (!BYTES_BIG_ENDIAN)
5370         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5371       else
5372         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5373       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5374     }
5375   DONE;
5376 })
5377
5378 ;; Load VSX Vector with Length
5379 (define_expand "lxvl"
5380   [(set (match_dup 3)
5381         (ashift:DI (match_operand:DI 2 "register_operand")
5382                    (const_int 56)))
5383    (set (match_operand:V16QI 0 "vsx_register_operand")
5384         (unspec:V16QI
5385          [(match_operand:DI 1 "gpc_reg_operand")
5386           (mem:V16QI (match_dup 1))
5387           (match_dup 3)]
5388          UNSPEC_LXVL))]
5389   "TARGET_P9_VECTOR && TARGET_64BIT"
5390 {
5391   operands[3] = gen_reg_rtx (DImode);
5392 })
5393
5394 (define_insn "*lxvl"
5395   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5396         (unspec:V16QI
5397          [(match_operand:DI 1 "gpc_reg_operand" "b")
5398           (mem:V16QI (match_dup 1))
5399           (match_operand:DI 2 "register_operand" "r")]
5400          UNSPEC_LXVL))]
5401   "TARGET_P9_VECTOR && TARGET_64BIT"
5402   "lxvl %x0,%1,%2"
5403   [(set_attr "type" "vecload")])
5404
5405 (define_insn "lxvll"
5406   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5407         (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5408                        (mem:V16QI (match_dup 1))
5409                        (match_operand:DI 2 "register_operand" "r")]
5410                       UNSPEC_LXVLL))]
5411   "TARGET_P9_VECTOR"
5412   "lxvll %x0,%1,%2"
5413   [(set_attr "type" "vecload")])
5414
5415 ;; Expand for builtin xl_len_r
5416 (define_expand "xl_len_r"
5417   [(match_operand:V16QI 0 "vsx_register_operand")
5418    (match_operand:DI 1 "register_operand")
5419    (match_operand:DI 2 "register_operand")]
5420   ""
5421 {
5422   rtx shift_mask = gen_reg_rtx (V16QImode);
5423   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5424   rtx tmp = gen_reg_rtx (DImode);
5425
5426   emit_insn (gen_altivec_lvsl_reg_di (shift_mask, operands[2]));
5427   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5428   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5429   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5430              shift_mask));
5431   DONE;
5432 })
5433
5434 (define_insn "stxvll"
5435   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5436         (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5437                        (mem:V16QI (match_dup 1))
5438                        (match_operand:DI 2 "register_operand" "r")]
5439                       UNSPEC_STXVLL))]
5440   "TARGET_P9_VECTOR"
5441   "stxvll %x0,%1,%2"
5442   [(set_attr "type" "vecstore")])
5443
5444 ;; Store VSX Vector with Length
5445 (define_expand "stxvl"
5446   [(set (match_dup 3)
5447         (ashift:DI (match_operand:DI 2 "register_operand")
5448                    (const_int 56)))
5449    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5450         (unspec:V16QI
5451          [(match_operand:V16QI 0 "vsx_register_operand")
5452           (mem:V16QI (match_dup 1))
5453           (match_dup 3)]
5454          UNSPEC_STXVL))]
5455   "TARGET_P9_VECTOR && TARGET_64BIT"
5456 {
5457   operands[3] = gen_reg_rtx (DImode);
5458 })
5459
5460 ;; Define optab for vector access with length vectorization exploitation.
5461 (define_expand "len_load_v16qi"
5462   [(match_operand:V16QI 0 "vlogical_operand")
5463    (match_operand:V16QI 1 "memory_operand")
5464    (match_operand:QI 2 "gpc_reg_operand")]
5465   "TARGET_P9_VECTOR && TARGET_64BIT"
5466 {
5467   rtx mem = XEXP (operands[1], 0);
5468   mem = force_reg (DImode, mem);
5469   rtx len = gen_lowpart (DImode, operands[2]);
5470   emit_insn (gen_lxvl (operands[0], mem, len));
5471   DONE;
5472 })
5473
5474 (define_expand "len_store_v16qi"
5475   [(match_operand:V16QI 0 "memory_operand")
5476    (match_operand:V16QI 1 "vlogical_operand")
5477    (match_operand:QI 2 "gpc_reg_operand")
5478   ]
5479   "TARGET_P9_VECTOR && TARGET_64BIT"
5480 {
5481   rtx mem = XEXP (operands[0], 0);
5482   mem = force_reg (DImode, mem);
5483   rtx len = gen_lowpart (DImode, operands[2]);
5484   emit_insn (gen_stxvl (operands[1], mem, len));
5485   DONE;
5486 })
5487
5488 (define_insn "*stxvl"
5489   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5490         (unspec:V16QI
5491          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5492           (mem:V16QI (match_dup 1))
5493           (match_operand:DI 2 "register_operand" "r")]
5494          UNSPEC_STXVL))]
5495   "TARGET_P9_VECTOR && TARGET_64BIT"
5496   "stxvl %x0,%1,%2"
5497   [(set_attr "type" "vecstore")])
5498
5499 ;; Expand for builtin xst_len_r
5500 (define_expand "xst_len_r"
5501   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5502    (match_operand:DI 1 "register_operand" "b")
5503    (match_operand:DI 2 "register_operand" "r")]
5504   "UNSPEC_XST_LEN_R"
5505 {
5506   rtx shift_mask = gen_reg_rtx (V16QImode);
5507   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5508   rtx tmp = gen_reg_rtx (DImode);
5509
5510   emit_insn (gen_altivec_lvsr_reg_di (shift_mask, operands[2]));
5511   emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5512              shift_mask));
5513   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5514   emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5515   DONE;
5516 })
5517
5518 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5519 (define_insn "vcmpneb"
5520   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5521          (not:V16QI
5522            (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5523                      (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5524   "TARGET_P9_VECTOR"
5525   "vcmpneb %0,%1,%2"
5526   [(set_attr "type" "vecsimple")])
5527
5528 ;; Vector Compare Not Equal or Zero Byte
5529 (define_insn "vcmpnezb"
5530   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5531         (unspec:V16QI
5532          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5533           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5534          UNSPEC_VCMPNEZB))]
5535   "TARGET_P9_VECTOR"
5536   "vcmpnezb %0,%1,%2"
5537   [(set_attr "type" "vecsimple")])
5538
5539 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5540 (define_insn "vcmpnezb_p"
5541   [(set (reg:CC CR6_REGNO)
5542         (unspec:CC
5543          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5544           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5545          UNSPEC_VCMPNEZB))
5546    (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5547         (unspec:V16QI
5548          [(match_dup 1)
5549           (match_dup 2)]
5550          UNSPEC_VCMPNEZB))]
5551   "TARGET_P9_VECTOR"
5552   "vcmpnezb. %0,%1,%2"
5553   [(set_attr "type" "vecsimple")])
5554
5555 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5556 (define_insn "vcmpneh"
5557   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5558         (not:V8HI
5559           (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5560                    (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5561   "TARGET_P9_VECTOR"
5562   "vcmpneh %0,%1,%2"
5563   [(set_attr "type" "vecsimple")])
5564
5565 ;; Vector Compare Not Equal or Zero Half Word
5566 (define_insn "vcmpnezh"
5567   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5568         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5569                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
5570          UNSPEC_VCMPNEZH))]
5571   "TARGET_P9_VECTOR"
5572   "vcmpnezh %0,%1,%2"
5573   [(set_attr "type" "vecsimple")])
5574
5575 ;; Vector Compare Not Equal Word (specified/not+eq:)
5576 (define_insn "vcmpnew"
5577   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5578         (not:V4SI
5579           (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5580                    (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5581   "TARGET_P9_VECTOR"
5582   "vcmpnew %0,%1,%2"
5583   [(set_attr "type" "vecsimple")])
5584
5585 ;; Vector Compare Not Equal or Zero Word
5586 (define_insn "vcmpnezw"
5587   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5588         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5589                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
5590          UNSPEC_VCMPNEZW))]
5591   "TARGET_P9_VECTOR"
5592   "vcmpnezw %0,%1,%2"
5593   [(set_attr "type" "vecsimple")])
5594
5595 ;; Vector Count Leading Zero Least-Significant Bits Byte
5596 (define_insn "vclzlsbb_<mode>"
5597   [(set (match_operand:SI 0 "register_operand" "=r")
5598         (unspec:SI
5599          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5600          UNSPEC_VCLZLSBB))]
5601   "TARGET_P9_VECTOR"
5602   "vclzlsbb %0,%1"
5603   [(set_attr "type" "vecsimple")])
5604
5605 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5606 (define_insn "vctzlsbb_<mode>"
5607   [(set (match_operand:SI 0 "register_operand" "=r")
5608         (unspec:SI
5609          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5610          UNSPEC_VCTZLSBB))]
5611   "TARGET_P9_VECTOR"
5612   "vctzlsbb %0,%1"
5613   [(set_attr "type" "vecsimple")])
5614
5615 ;; Vector Extract Unsigned Byte Left-Indexed
5616 (define_insn "vextublx"
5617   [(set (match_operand:SI 0 "register_operand" "=r")
5618         (unspec:SI
5619          [(match_operand:SI 1 "register_operand" "r")
5620           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5621          UNSPEC_VEXTUBLX))]
5622   "TARGET_P9_VECTOR"
5623   "vextublx %0,%1,%2"
5624   [(set_attr "type" "vecsimple")])
5625
5626 ;; Vector Extract Unsigned Byte Right-Indexed
5627 (define_insn "vextubrx"
5628   [(set (match_operand:SI 0 "register_operand" "=r")
5629         (unspec:SI
5630          [(match_operand:SI 1 "register_operand" "r")
5631           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5632          UNSPEC_VEXTUBRX))]
5633   "TARGET_P9_VECTOR"
5634   "vextubrx %0,%1,%2"
5635   [(set_attr "type" "vecsimple")])
5636
5637 ;; Vector Extract Unsigned Half Word Left-Indexed
5638 (define_insn "vextuhlx"
5639   [(set (match_operand:SI 0 "register_operand" "=r")
5640         (unspec:SI
5641          [(match_operand:SI 1 "register_operand" "r")
5642           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5643          UNSPEC_VEXTUHLX))]
5644   "TARGET_P9_VECTOR"
5645   "vextuhlx %0,%1,%2"
5646   [(set_attr "type" "vecsimple")])
5647
5648 ;; Vector Extract Unsigned Half Word Right-Indexed
5649 (define_insn "vextuhrx"
5650   [(set (match_operand:SI 0 "register_operand" "=r")
5651         (unspec:SI
5652          [(match_operand:SI 1 "register_operand" "r")
5653           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5654          UNSPEC_VEXTUHRX))]
5655   "TARGET_P9_VECTOR"
5656   "vextuhrx %0,%1,%2"
5657   [(set_attr "type" "vecsimple")])
5658
5659 ;; Vector Extract Unsigned Word Left-Indexed
5660 (define_insn "vextuwlx"
5661   [(set (match_operand:SI 0 "register_operand" "=r")
5662         (unspec:SI
5663          [(match_operand:SI 1 "register_operand" "r")
5664           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5665          UNSPEC_VEXTUWLX))]
5666   "TARGET_P9_VECTOR"
5667   "vextuwlx %0,%1,%2"
5668   [(set_attr "type" "vecsimple")])
5669
5670 ;; Vector Extract Unsigned Word Right-Indexed
5671 (define_insn "vextuwrx"
5672   [(set (match_operand:SI 0 "register_operand" "=r")
5673         (unspec:SI
5674          [(match_operand:SI 1 "register_operand" "r")
5675           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5676          UNSPEC_VEXTUWRX))]
5677   "TARGET_P9_VECTOR"
5678   "vextuwrx %0,%1,%2"
5679   [(set_attr "type" "vecsimple")])
5680
5681 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
5682 ;; endian version needs to adjust the byte number, and the V4SI element in
5683 ;; vinsert4b.
5684 (define_insn "extract4b"
5685   [(set (match_operand:V2DI 0 "vsx_register_operand")
5686        (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5687                      (match_operand:QI 2 "const_0_to_12_operand" "n")]
5688                     UNSPEC_XXEXTRACTUW))]
5689   "TARGET_P9_VECTOR"
5690 {
5691   if (!BYTES_BIG_ENDIAN)
5692     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5693
5694   return "xxextractuw %x0,%x1,%2";
5695 })
5696
5697 (define_expand "insert4b"
5698   [(set (match_operand:V16QI 0 "vsx_register_operand")
5699         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5700                        (match_operand:V16QI 2 "vsx_register_operand")
5701                        (match_operand:QI 3 "const_0_to_12_operand")]
5702                    UNSPEC_XXINSERTW))]
5703   "TARGET_P9_VECTOR"
5704 {
5705   if (!BYTES_BIG_ENDIAN)
5706     {
5707       rtx op1 = operands[1];
5708       rtx v4si_tmp = gen_reg_rtx (V4SImode);
5709       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5710       operands[1] = v4si_tmp;
5711       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5712     }
5713 })
5714
5715 (define_insn "*insert4b_internal"
5716   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5717         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5718                        (match_operand:V16QI 2 "vsx_register_operand" "0")
5719                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
5720                    UNSPEC_XXINSERTW))]
5721   "TARGET_P9_VECTOR"
5722   "xxinsertw %x0,%x1,%3"
5723   [(set_attr "type" "vecperm")])
5724
5725
5726 ;; Generate vector extract four float 32 values from left four elements
5727 ;; of eight element vector of float 16 values.
5728 (define_expand "vextract_fp_from_shorth"
5729   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5730         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5731    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5732   "TARGET_P9_VECTOR"
5733 {
5734   int i;
5735   int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5736   int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
5737
5738   rtx rvals[16];
5739   rtx mask = gen_reg_rtx (V16QImode);
5740   rtx tmp = gen_reg_rtx (V16QImode);
5741   rtvec v;
5742
5743   for (i = 0; i < 16; i++)
5744     if (!BYTES_BIG_ENDIAN)
5745       rvals[i] = GEN_INT (vals_le[i]);
5746     else
5747       rvals[i] = GEN_INT (vals_be[i]);
5748
5749   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5750      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5751      src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5752      conversion instruction.  */
5753   v = gen_rtvec_v (16, rvals);
5754   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5755   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5756                                           operands[1], mask));
5757   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5758   DONE;
5759 })
5760
5761 ;; Generate vector extract four float 32 values from right four elements
5762 ;; of eight element vector of float 16 values.
5763 (define_expand "vextract_fp_from_shortl"
5764   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5765         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5766         UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5767   "TARGET_P9_VECTOR"
5768 {
5769   int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5770   int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
5771
5772   int i;
5773   rtx rvals[16];
5774   rtx mask = gen_reg_rtx (V16QImode);
5775   rtx tmp = gen_reg_rtx (V16QImode);
5776   rtvec v;
5777
5778   for (i = 0; i < 16; i++)
5779     if (!BYTES_BIG_ENDIAN)
5780       rvals[i] = GEN_INT (vals_le[i]);
5781     else
5782       rvals[i] = GEN_INT (vals_be[i]);
5783
5784   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5785      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5786      src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5787      conversion instruction.  */
5788   v = gen_rtvec_v (16, rvals);
5789   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5790   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5791                                           operands[1], mask));
5792   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5793   DONE;
5794 })
5795
5796 ;; Support for ISA 3.0 vector byte reverse
5797
5798 ;; Swap all bytes with in a vector
5799 (define_insn "p9_xxbrq_v1ti"
5800   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5801         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5802   "TARGET_P9_VECTOR"
5803   "xxbrq %x0,%x1"
5804   [(set_attr "type" "vecperm")])
5805
5806 (define_expand "p9_xxbrq_v16qi"
5807   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5808    (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5809   "TARGET_P9_VECTOR"
5810 {
5811   rtx op0 = gen_reg_rtx (V1TImode);
5812   rtx op1 = gen_lowpart (V1TImode, operands[1]);
5813   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5814   emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5815   DONE;
5816 })
5817
5818 ;; Swap all bytes in each 64-bit element
5819 (define_insn "p9_xxbrd_v2di"
5820   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5821         (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5822   "TARGET_P9_VECTOR"
5823   "xxbrd %x0,%x1"
5824   [(set_attr "type" "vecperm")])
5825
5826 (define_expand "p9_xxbrd_v2df"
5827   [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5828    (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5829   "TARGET_P9_VECTOR"
5830 {
5831   rtx op0 = gen_reg_rtx (V2DImode);
5832   rtx op1 = gen_lowpart (V2DImode, operands[1]);
5833   emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5834   emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5835   DONE;
5836 })
5837
5838 ;; Swap all bytes in each 32-bit element
5839 (define_insn "p9_xxbrw_v4si"
5840   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5841         (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5842   "TARGET_P9_VECTOR"
5843   "xxbrw %x0,%x1"
5844   [(set_attr "type" "vecperm")])
5845
5846 (define_expand "p9_xxbrw_v4sf"
5847   [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5848    (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5849   "TARGET_P9_VECTOR"
5850 {
5851   rtx op0 = gen_reg_rtx (V4SImode);
5852   rtx op1 = gen_lowpart (V4SImode, operands[1]);
5853   emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5854   emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5855   DONE;
5856 })
5857
5858 ;; Swap all bytes in each element of vector
5859 (define_expand "revb_<mode>"
5860   [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5861    (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5862   ""
5863 {
5864   if (TARGET_P9_VECTOR)
5865     emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5866   else
5867     {
5868       /* Want to have the elements in reverse order relative
5869          to the endian mode in use, i.e. in LE mode, put elements
5870          in BE order.  */
5871       rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5872       emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5873                                            operands[1], sel));
5874     }
5875
5876   DONE;
5877 })
5878
5879 ;; Reversing bytes in vector char is just a NOP.
5880 (define_expand "revb_v16qi"
5881   [(set (match_operand:V16QI 0 "vsx_register_operand")
5882         (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5883   ""
5884 {
5885   emit_move_insn (operands[0], operands[1]);
5886   DONE;
5887 })
5888
5889 ;; Swap all bytes in each 16-bit element
5890 (define_insn "p9_xxbrh_v8hi"
5891   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5892         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5893   "TARGET_P9_VECTOR"
5894   "xxbrh %x0,%x1"
5895   [(set_attr "type" "vecperm")])
5896 \f
5897
5898 ;; Operand numbers for the following peephole2
5899 (define_constants
5900   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
5901    (SFBOOL_TMP_VSX               1)             ;; vector temporary
5902    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
5903    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
5904    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
5905    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
5906    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
5907    (SFBOOL_SHL_D                 7)             ;; shift left dest
5908    (SFBOOL_SHL_A                 8)             ;; shift left arg
5909    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
5910    (SFBOOL_MFVSR_A_V4SF         10)             ;; SFBOOL_MFVSR_A as V4SFmode
5911    (SFBOOL_BOOL_A_DI            11)             ;; SFBOOL_BOOL_A1/A2 as DImode
5912    (SFBOOL_TMP_VSX_DI           12)             ;; SFBOOL_TMP_VSX as DImode
5913    (SFBOOL_MTVSR_D_V4SF         13)])           ;; SFBOOL_MTVSRD_D as V4SFmode
5914
5915 ;; Attempt to optimize some common GLIBC operations using logical operations to
5916 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
5917 ;; after macro expansion that looks like:
5918 ;;
5919 ;;      typedef union {
5920 ;;        float value;
5921 ;;        uint32_t word;
5922 ;;      } ieee_float_shape_type;
5923 ;;
5924 ;;      float t1;
5925 ;;      int32_t is;
5926 ;;
5927 ;;      do {
5928 ;;        ieee_float_shape_type gf_u;
5929 ;;        gf_u.value = (t1);
5930 ;;        (is) = gf_u.word;
5931 ;;      } while (0);
5932 ;;
5933 ;;      do {
5934 ;;        ieee_float_shape_type sf_u;
5935 ;;        sf_u.word = (is & 0xfffff000);
5936 ;;        (t1) = sf_u.value;
5937 ;;      } while (0);
5938 ;;
5939 ;;
5940 ;; This would result in two direct move operations (convert to memory format,
5941 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5942 ;; scalar format).  With this peephole, we eliminate the direct move to the
5943 ;; GPR, and instead move the integer mask value to the vector register after a
5944 ;; shift and do the VSX logical operation.
5945
5946 ;; The insns for dealing with SFmode in GPR registers looks like:
5947 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5948 ;;
5949 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5950 ;;
5951 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5952 ;;
5953 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5954 ;;
5955 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5956 ;;
5957 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5958
5959 (define_peephole2
5960   [(match_scratch:DI SFBOOL_TMP_GPR "r")
5961    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5962
5963    ;; MFVSRWZ (aka zero_extend)
5964    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5965         (zero_extend:DI
5966          (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5967
5968    ;; AND/IOR/XOR operation on int
5969    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5970         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5971                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5972
5973    ;; SLDI
5974    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5975         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5976                    (const_int 32)))
5977
5978    ;; MTVSRD
5979    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5980         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5981
5982   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5983    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5984       to compare registers, when the mode is different.  */
5985    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5986    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5987    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5988    && (REG_P (operands[SFBOOL_BOOL_A2])
5989        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5990    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5991        || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5992    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5993        || (REG_P (operands[SFBOOL_BOOL_A2])
5994            && REGNO (operands[SFBOOL_MFVSR_D])
5995                 == REGNO (operands[SFBOOL_BOOL_A2])))
5996    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5997    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5998        || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5999    && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
6000   [(set (match_dup SFBOOL_TMP_GPR)
6001         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
6002                    (const_int 32)))
6003
6004    (set (match_dup SFBOOL_TMP_VSX_DI)
6005         (match_dup SFBOOL_TMP_GPR))
6006
6007    (set (match_dup SFBOOL_MTVSR_D_V4SF)
6008         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
6009                           (match_dup SFBOOL_TMP_VSX)))]
6010 {
6011   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
6012   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
6013   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
6014   int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
6015   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
6016   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
6017
6018   if (CONST_INT_P (bool_a2))
6019     {
6020       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
6021       emit_move_insn (tmp_gpr, bool_a2);
6022       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
6023     }
6024   else
6025     {
6026       int regno_bool_a1 = REGNO (bool_a1);
6027       int regno_bool_a2 = REGNO (bool_a2);
6028       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
6029                           ? regno_bool_a2 : regno_bool_a1);
6030       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
6031     }
6032
6033   operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
6034   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
6035   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
6036 })
6037
6038 ;; Support signed/unsigned long long to float conversion vectorization.
6039 ;; Note that any_float (pc) here is just for code attribute <su>.
6040 (define_expand "vec_pack<su>_float_v2di"
6041   [(match_operand:V4SF 0 "vfloat_operand")
6042    (match_operand:V2DI 1 "vint_operand")
6043    (match_operand:V2DI 2 "vint_operand")
6044    (any_float (pc))]
6045   "TARGET_VSX"
6046 {
6047   rtx r1 = gen_reg_rtx (V4SFmode);
6048   rtx r2 = gen_reg_rtx (V4SFmode);
6049   emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
6050   emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
6051   rs6000_expand_extract_even (operands[0], r1, r2);
6052   DONE;
6053 })
6054
6055 ;; Support float to signed/unsigned long long conversion vectorization.
6056 ;; Note that any_fix (pc) here is just for code attribute <su>.
6057 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
6058   [(match_operand:V2DI 0 "vint_operand")
6059    (match_operand:V4SF 1 "vfloat_operand")
6060    (any_fix (pc))]
6061   "TARGET_VSX"
6062 {
6063   rtx reg = gen_reg_rtx (V4SFmode);
6064   rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
6065   emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6066   DONE;
6067 })
6068
6069 ;; Note that any_fix (pc) here is just for code attribute <su>.
6070 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
6071   [(match_operand:V2DI 0 "vint_operand")
6072    (match_operand:V4SF 1 "vfloat_operand")
6073    (any_fix (pc))]
6074   "TARGET_VSX"
6075 {
6076   rtx reg = gen_reg_rtx (V4SFmode);
6077   rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
6078   emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6079   DONE;
6080 })
6081
6082 (define_insn "vsx_<xvcvbf16>"
6083   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
6084         (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
6085                       XVCVBF16))]
6086   "TARGET_POWER10"
6087   "<xvcvbf16> %x0,%x1"
6088   [(set_attr "type" "vecfloat")])
6089
6090 (define_insn "vec_mtvsrbmi"
6091   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
6092         (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")]
6093         UNSPEC_MTVSBM))]
6094   "TARGET_POWER10"
6095   "mtvsrbmi %0,%1"
6096 )
6097
6098 (define_insn "vec_mtvsr_<mode>"
6099   [(set (match_operand:VSX_MM 0 "altivec_register_operand" "=v")
6100         (unspec:VSX_MM [(match_operand:DI 1 "gpc_reg_operand" "r")]
6101         UNSPEC_MTVSBM))]
6102   "TARGET_POWER10"
6103   "mtvsr<wd>m %0,%1";
6104   [(set_attr "type" "vecsimple")])
6105
6106 (define_insn "vec_cntmb_<mode>"
6107   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
6108         (unspec:DI [(match_operand:VSX_MM4 1 "altivec_register_operand" "v")
6109                     (match_operand:QI 2 "const_0_to_1_operand" "n")]
6110         UNSPEC_VCNTMB))]
6111   "TARGET_POWER10"
6112   "vcntmb<wd> %0,%1,%2"
6113   [(set_attr "type" "vecsimple")])
6114
6115 (define_insn "vec_extract_<mode>"
6116   [(set (match_operand:SI 0 "register_operand" "=r")
6117         (unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")]
6118         UNSPEC_VEXTRACT))]
6119   "TARGET_POWER10"
6120   "vextract<wd>m %0,%1"
6121   [(set_attr "type" "vecsimple")])
6122
6123 (define_insn "vec_expand_<mode>"
6124   [(set (match_operand:VSX_MM 0 "vsx_register_operand" "=v")
6125         (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")]
6126         UNSPEC_VEXPAND))]
6127   "TARGET_POWER10"
6128   "vexpand<wd>m %0,%1"
6129   [(set_attr "type" "vecsimple")])
6130
6131 (define_insn "dives_<mode>"
6132   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6133         (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6134                         (match_operand:VIlong 2 "vsx_register_operand" "v")]
6135         UNSPEC_VDIVES))]
6136   "TARGET_POWER10"
6137   "vdives<wd> %0,%1,%2"
6138   [(set_attr "type" "vecdiv")
6139    (set_attr "size" "<bits>")])
6140
6141 (define_insn "diveu_<mode>"
6142   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6143         (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6144                         (match_operand:VIlong 2 "vsx_register_operand" "v")]
6145         UNSPEC_VDIVEU))]
6146   "TARGET_POWER10"
6147   "vdiveu<wd> %0,%1,%2"
6148   [(set_attr "type" "vecdiv")
6149    (set_attr "size" "<bits>")])
6150
6151 (define_insn "div<mode>3"
6152   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6153         (div:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6154                     (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6155   "TARGET_POWER10"
6156   "vdivs<wd> %0,%1,%2"
6157   [(set_attr "type" "vecdiv")
6158    (set_attr "size" "<bits>")])
6159
6160 (define_insn "udiv<mode>3"
6161   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6162         (udiv:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6163                     (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6164   "TARGET_POWER10"
6165   "vdivu<wd> %0,%1,%2"
6166   [(set_attr "type" "vecdiv")
6167    (set_attr "size" "<bits>")])
6168
6169 (define_insn "mods_<mode>"
6170   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6171         (mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6172                     (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6173   "TARGET_POWER10"
6174   "vmods<wd> %0,%1,%2"
6175   [(set_attr "type" "vecdiv")
6176    (set_attr "size" "<bits>")])
6177
6178 (define_insn "modu_<mode>"
6179   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6180         (umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6181                      (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6182   "TARGET_POWER10"
6183   "vmodu<wd> %0,%1,%2"
6184   [(set_attr "type" "vecdiv")
6185    (set_attr "size" "<bits>")])
6186
6187 (define_insn "mulhs_<mode>"
6188   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6189         (mult:VIlong (ashiftrt
6190                        (match_operand:VIlong 1 "vsx_register_operand" "v")
6191                        (const_int 32))
6192                      (ashiftrt
6193                        (match_operand:VIlong 2 "vsx_register_operand" "v")
6194                        (const_int 32))))]
6195   "TARGET_POWER10"
6196   "vmulhs<wd> %0,%1,%2"
6197   [(set_attr "type" "veccomplex")])
6198
6199 (define_insn "mulhu_<mode>"
6200   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6201         (us_mult:VIlong (ashiftrt
6202                           (match_operand:VIlong 1 "vsx_register_operand" "v")
6203                           (const_int 32))
6204                         (ashiftrt
6205                           (match_operand:VIlong 2 "vsx_register_operand" "v")
6206                           (const_int 32))))]
6207   "TARGET_POWER10"
6208   "vmulhu<wd> %0,%1,%2"
6209   [(set_attr "type" "veccomplex")])
6210
6211 ;; Vector multiply low double word
6212 (define_insn "mulv2di3"
6213   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
6214         (mult:V2DI (match_operand:V2DI 1 "vsx_register_operand" "v")
6215                    (match_operand:V2DI 2 "vsx_register_operand" "v")))]
6216   "TARGET_POWER10"
6217   "vmulld %0,%1,%2"
6218   [(set_attr "type" "veccomplex")])