gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2019 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Mode attribute for vector floate and floato conversions
  25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
  26
  27 ;; Iterator for both scalar and vector floating point types supported by VSX
  28 (define_mode_iterator VSX_B [DF V4SF V2DF])
  29
  30 ;; Iterator for the 2 64-bit vector types
  31 (define_mode_iterator VSX_D [V2DF V2DI])
  32
  33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  34 ;; types that goes in a single vector register.
  35 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  36                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  37                                   TI
  38                                   V1TI])
  39
  40 ;; Iterator for 128-bit integer types that go in a single vector register.
  41 (define_mode_iterator VSX_TI [TI V1TI])
  42
  43 ;; Iterator for the 2 32-bit vector types
  44 (define_mode_iterator VSX_W [V4SF V4SI])
  45
  46 ;; Iterator for the DF types
  47 (define_mode_iterator VSX_DF [V2DF DF])
  48
  49 ;; Iterator for vector floating point types supported by VSX
  50 (define_mode_iterator VSX_F [V4SF V2DF])
  51
  52 ;; Iterator for logical types supported by VSX
  53 (define_mode_iterator VSX_L [V16QI
  54                              V8HI
  55                              V4SI
  56                              V2DI
  57                              V4SF
  58                              V2DF
  59                              V1TI
  60                              TI
  61                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  62                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  63
  64 ;; Iterator for memory moves.
  65 (define_mode_iterator VSX_M [V16QI
  66                              V8HI
  67                              V4SI
  68                              V2DI
  69                              V4SF
  70                              V2DF
  71                              V1TI
  72                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  73                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  74                              TI])
  75
  76 (define_mode_attr VSX_XXBR  [(V8HI  "h")
  77                              (V4SI  "w")
  78                              (V4SF  "w")
  79                              (V2DF  "d")
  80                              (V2DI  "d")
  81                              (V1TI  "q")])
  82
  83 ;; Map into the appropriate load/store name based on the type
  84 (define_mode_attr VSm  [(V16QI "vw4")
  85                         (V8HI  "vw4")
  86                         (V4SI  "vw4")
  87                         (V4SF  "vw4")
  88                         (V2DF  "vd2")
  89                         (V2DI  "vd2")
  90                         (DF    "d")
  91                         (TF    "vd2")
  92                         (KF    "vd2")
  93                         (V1TI  "vd2")
  94                         (TI    "vd2")])
  95
  96 ;; Map the register class used
  97 (define_mode_attr VSr   [(V16QI "v")
  98                          (V8HI  "v")
  99                          (V4SI  "v")
 100                          (V4SF  "wa")
 101                          (V2DI  "wa")
 102                          (V2DF  "wa")
 103                          (DI    "wa")
 104                          (DF    "wa")
 105                          (SF    "wa")
 106                          (TF    "wa")
 107                          (KF    "wa")
 108                          (V1TI  "v")
 109                          (TI    "wa")])
 110
 111 ;; What value we need in the "isa" field, to make the IEEE QP float work.
 112 (define_mode_attr VSisa [(V16QI "*")
 113                          (V8HI  "*")
 114                          (V4SI  "*")
 115                          (V4SF  "*")
 116                          (V2DI  "*")
 117                          (V2DF  "*")
 118                          (DI    "*")
 119                          (DF    "*")
 120                          (SF    "*")
 121                          (V1TI  "*")
 122                          (TI    "*")
 123                          (TF    "p9tf")
 124                          (KF    "p9kf")])
 125
 126 ;; A mode attribute to disparage use of GPR registers, except for scalar
 127 ;; integer modes.
 128 (define_mode_attr ??r   [(V16QI "??r")
 129                          (V8HI  "??r")
 130                          (V4SI  "??r")
 131                          (V4SF  "??r")
 132                          (V2DI  "??r")
 133                          (V2DF  "??r")
 134                          (V1TI  "??r")
 135                          (KF    "??r")
 136                          (TF    "??r")
 137                          (TI    "r")])
 138
 139 ;; A mode attribute used for 128-bit constant values.
 140 (define_mode_attr nW    [(V16QI "W")
 141                          (V8HI  "W")
 142                          (V4SI  "W")
 143                          (V4SF  "W")
 144                          (V2DI  "W")
 145                          (V2DF  "W")
 146                          (V1TI  "W")
 147                          (KF    "W")
 148                          (TF    "W")
 149                          (TI    "n")])
 150
 151 ;; Same size integer type for floating point data
 152 (define_mode_attr VSi [(V4SF  "v4si")
 153                        (V2DF  "v2di")
 154                        (DF    "di")])
 155
 156 (define_mode_attr VSI [(V4SF  "V4SI")
 157                        (V2DF  "V2DI")
 158                        (DF    "DI")])
 159
 160 ;; Word size for same size conversion
 161 (define_mode_attr VSc [(V4SF "w")
 162                        (V2DF "d")
 163                        (DF   "d")])
 164
 165 ;; Map into either s or v, depending on whether this is a scalar or vector
 166 ;; operation
 167 (define_mode_attr VSv   [(V16QI "v")
 168                          (V8HI  "v")
 169                          (V4SI  "v")
 170                          (V4SF  "v")
 171                          (V2DI  "v")
 172                          (V2DF  "v")
 173                          (V1TI  "v")
 174                          (DF    "s")
 175                          (KF    "v")])
 176
 177 ;; Appropriate type for add ops (and other simple FP ops)
 178 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 179                                  (V4SF "vecfloat")
 180                                  (DF   "fp")])
 181
 182 ;; Appropriate type for multiply ops
 183 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 184                                  (V4SF "vecfloat")
 185                                  (DF   "dmul")])
 186
 187 ;; Appropriate type for divide ops.
 188 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 189                                  (V4SF "vecfdiv")
 190                                  (DF   "ddiv")])
 191
 192 ;; Map the scalar mode for a vector type
 193 (define_mode_attr VS_scalar [(V1TI      "TI")
 194                              (V2DF      "DF")
 195                              (V2DI      "DI")
 196                              (V4SF      "SF")
 197                              (V4SI      "SI")
 198                              (V8HI      "HI")
 199                              (V16QI     "QI")])
 200
 201 ;; Map to a double-sized vector mode
 202 (define_mode_attr VS_double [(V4SI      "V8SI")
 203                              (V4SF      "V8SF")
 204                              (V2DI      "V4DI")
 205                              (V2DF      "V4DF")
 206                              (V1TI      "V2TI")])
 207
 208 ;; Iterators for loading constants with xxspltib
 209 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 210 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 211
 212 ;; Vector reverse byte modes
 213 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 214
 215 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 216 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 217 ;; done on ISA 2.07 and not just ISA 3.0.
 218 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 219 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 220
 221 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 222                                      (V8HI "h")
 223                                      (V4SI "w")])
 224
 225 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 226 ;; insert to validate the operand number.
 227 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 228                                          (V8HI  "const_0_to_7_operand")
 229                                          (V4SI  "const_0_to_3_operand")])
 230
 231 ;; Mode attribute to give the constraint for vector extract and insert
 232 ;; operations.
 233 (define_mode_attr VSX_EX [(V16QI "v")
 234                           (V8HI  "v")
 235                           (V4SI  "wa")])
 236
 237 ;; Mode iterator for binary floating types other than double to
 238 ;; optimize convert to that floating point type from an extract
 239 ;; of an integer type
 240 (define_mode_iterator VSX_EXTRACT_FL [SF
 241                                       (IF "FLOAT128_2REG_P (IFmode)")
 242                                       (KF "TARGET_FLOAT128_HW")
 243                                       (TF "FLOAT128_2REG_P (TFmode)
 244                                            || (FLOAT128_IEEE_P (TFmode)
 245                                                && TARGET_FLOAT128_HW)")])
 246
 247 ;; Mode iterator for binary floating types that have a direct conversion
 248 ;; from 64-bit integer to floating point
 249 (define_mode_iterator FL_CONV [SF
 250                                DF
 251                                (KF "TARGET_FLOAT128_HW")
 252                                (TF "TARGET_FLOAT128_HW
 253                                     && FLOAT128_IEEE_P (TFmode)")])
 254
 255 ;; Iterator for the 2 short vector types to do a splat from an integer
 256 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 257
 258 ;; Mode attribute to give the count for the splat instruction to splat
 259 ;; the value in the 64-bit integer slot
 260 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 261
 262 ;; Mode attribute to give the suffix for the splat instruction
 263 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 264
 265 ;; Constants for creating unspecs
 266 (define_c_enum "unspec"
 267   [UNSPEC_VSX_CONCAT
 268    UNSPEC_VSX_CVDPSXWS
 269    UNSPEC_VSX_CVDPUXWS
 270    UNSPEC_VSX_CVSPDP
 271    UNSPEC_VSX_CVHPSP
 272    UNSPEC_VSX_CVSPDPN
 273    UNSPEC_VSX_CVDPSPN
 274    UNSPEC_VSX_CVSXWDP
 275    UNSPEC_VSX_CVUXWDP
 276    UNSPEC_VSX_CVSXDSP
 277    UNSPEC_VSX_CVUXDSP
 278    UNSPEC_VSX_CVSPSXDS
 279    UNSPEC_VSX_CVSPUXDS
 280    UNSPEC_VSX_CVSXWSP
 281    UNSPEC_VSX_CVUXWSP
 282    UNSPEC_VSX_FLOAT2
 283    UNSPEC_VSX_UNS_FLOAT2
 284    UNSPEC_VSX_FLOATE
 285    UNSPEC_VSX_UNS_FLOATE
 286    UNSPEC_VSX_FLOATO
 287    UNSPEC_VSX_UNS_FLOATO
 288    UNSPEC_VSX_TDIV
 289    UNSPEC_VSX_TSQRT
 290    UNSPEC_VSX_SET
 291    UNSPEC_VSX_ROUND_I
 292    UNSPEC_VSX_ROUND_IC
 293    UNSPEC_VSX_SLDWI
 294    UNSPEC_VSX_XXPERM
 295
 296    UNSPEC_VSX_XXSPLTW
 297    UNSPEC_VSX_XXSPLTD
 298    UNSPEC_VSX_DIVSD
 299    UNSPEC_VSX_DIVUD
 300    UNSPEC_VSX_MULSD
 301    UNSPEC_VSX_XVCVSXDDP
 302    UNSPEC_VSX_XVCVUXDDP
 303    UNSPEC_VSX_XVCVDPSXDS
 304    UNSPEC_VSX_XVCDPSP
 305    UNSPEC_VSX_XVCVDPUXDS
 306    UNSPEC_VSX_SIGN_EXTEND
 307    UNSPEC_VSX_XVCVSPSXWS
 308    UNSPEC_VSX_XVCVSPSXDS
 309    UNSPEC_VSX_VSLO
 310    UNSPEC_VSX_EXTRACT
 311    UNSPEC_VSX_SXEXPDP
 312    UNSPEC_VSX_SXSIG
 313    UNSPEC_VSX_SIEXPDP
 314    UNSPEC_VSX_SIEXPQP
 315    UNSPEC_VSX_SCMPEXPDP
 316    UNSPEC_VSX_SCMPEXPQP
 317    UNSPEC_VSX_STSTDC
 318    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
 319    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
 320    UNSPEC_VSX_VXEXP
 321    UNSPEC_VSX_VXSIG
 322    UNSPEC_VSX_VIEXP
 323    UNSPEC_VSX_VTSTDC
 324    UNSPEC_VSX_VSIGNED2
 325
 326    UNSPEC_LXVL
 327    UNSPEC_LXVLL
 328    UNSPEC_LVSL_REG
 329    UNSPEC_LVSR_REG
 330    UNSPEC_STXVL
 331    UNSPEC_STXVLL
 332    UNSPEC_XL_LEN_R
 333    UNSPEC_XST_LEN_R
 334
 335    UNSPEC_VCLZLSBB
 336    UNSPEC_VCTZLSBB
 337    UNSPEC_VEXTUBLX
 338    UNSPEC_VEXTUHLX
 339    UNSPEC_VEXTUWLX
 340    UNSPEC_VEXTUBRX
 341    UNSPEC_VEXTUHRX
 342    UNSPEC_VEXTUWRX
 343    UNSPEC_VCMPNEB
 344    UNSPEC_VCMPNEZB
 345    UNSPEC_VCMPNEH
 346    UNSPEC_VCMPNEZH
 347    UNSPEC_VCMPNEW
 348    UNSPEC_VCMPNEZW
 349    UNSPEC_XXEXTRACTUW
 350    UNSPEC_XXINSERTW
 351    UNSPEC_VSX_FIRST_MATCH_INDEX
 352    UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
 353    UNSPEC_VSX_FIRST_MISMATCH_INDEX
 354    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
 355   ])
 356
 357 ;; VSX moves
 358
 359 ;; The patterns for LE permuted loads and stores come before the general
 360 ;; VSX moves so they match first.
 361 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 362   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
 363         (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
 364   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 365   "#"
 366   "&& 1"
 367   [(set (match_dup 2)
 368         (vec_select:<MODE>
 369           (match_dup 1)
 370           (parallel [(const_int 1) (const_int 0)])))
 371    (set (match_dup 0)
 372         (vec_select:<MODE>
 373           (match_dup 2)
 374           (parallel [(const_int 1) (const_int 0)])))]
 375 {
 376   rtx mem = operands[1];
 377
 378   /* Don't apply the swap optimization if we've already performed register
 379      allocation and the hard register destination is not in the altivec
 380      range.  */
 381   if ((MEM_ALIGN (mem) >= 128)
 382       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
 383           || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
 384     {
 385       rtx mem_address = XEXP (mem, 0);
 386       enum machine_mode mode = GET_MODE (mem);
 387
 388       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 389         {
 390           /* Replace the source memory address with masked address.  */
 391           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 392           emit_insn (lvx_set_expr);
 393           DONE;
 394         }
 395       else if (rs6000_quadword_masked_address_p (mem_address))
 396         {
 397           /* This rtl is already in the form that matches lvx
 398              instruction, so leave it alone.  */
 399           DONE;
 400         }
 401       /* Otherwise, fall through to transform into a swapping load.  */
 402     }
 403   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 404                                        : operands[0];
 405 }
 406   [(set_attr "type" "vecload")
 407    (set_attr "length" "8")])
 408
 409 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 410   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
 411         (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
 412   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 413   "#"
 414   "&& 1"
 415   [(set (match_dup 2)
 416         (vec_select:<MODE>
 417           (match_dup 1)
 418           (parallel [(const_int 2) (const_int 3)
 419                      (const_int 0) (const_int 1)])))
 420    (set (match_dup 0)
 421         (vec_select:<MODE>
 422           (match_dup 2)
 423           (parallel [(const_int 2) (const_int 3)
 424                      (const_int 0) (const_int 1)])))]
 425 {
 426   rtx mem = operands[1];
 427
 428   /* Don't apply the swap optimization if we've already performed register
 429      allocation and the hard register destination is not in the altivec
 430      range.  */
 431   if ((MEM_ALIGN (mem) >= 128)
 432       && (!HARD_REGISTER_P (operands[0])
 433           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 434     {
 435       rtx mem_address = XEXP (mem, 0);
 436       enum machine_mode mode = GET_MODE (mem);
 437
 438       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 439         {
 440           /* Replace the source memory address with masked address.  */
 441           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 442           emit_insn (lvx_set_expr);
 443           DONE;
 444         }
 445       else if (rs6000_quadword_masked_address_p (mem_address))
 446         {
 447           /* This rtl is already in the form that matches lvx
 448              instruction, so leave it alone.  */
 449           DONE;
 450         }
 451       /* Otherwise, fall through to transform into a swapping load.  */
 452     }
 453   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 454                                        : operands[0];
 455 }
 456   [(set_attr "type" "vecload")
 457    (set_attr "length" "8")])
 458
 459 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 460   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 461         (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
 462   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 463   "#"
 464   "&& 1"
 465   [(set (match_dup 2)
 466         (vec_select:V8HI
 467           (match_dup 1)
 468           (parallel [(const_int 4) (const_int 5)
 469                      (const_int 6) (const_int 7)
 470                      (const_int 0) (const_int 1)
 471                      (const_int 2) (const_int 3)])))
 472    (set (match_dup 0)
 473         (vec_select:V8HI
 474           (match_dup 2)
 475           (parallel [(const_int 4) (const_int 5)
 476                      (const_int 6) (const_int 7)
 477                      (const_int 0) (const_int 1)
 478                      (const_int 2) (const_int 3)])))]
 479 {
 480   rtx mem = operands[1];
 481
 482   /* Don't apply the swap optimization if we've already performed register
 483      allocation and the hard register destination is not in the altivec
 484      range.  */
 485   if ((MEM_ALIGN (mem) >= 128)
 486       && (!HARD_REGISTER_P (operands[0])
 487           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 488     {
 489       rtx mem_address = XEXP (mem, 0);
 490       enum machine_mode mode = GET_MODE (mem);
 491
 492       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 493         {
 494           /* Replace the source memory address with masked address.  */
 495           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 496           emit_insn (lvx_set_expr);
 497           DONE;
 498         }
 499       else if (rs6000_quadword_masked_address_p (mem_address))
 500         {
 501           /* This rtl is already in the form that matches lvx
 502              instruction, so leave it alone.  */
 503           DONE;
 504         }
 505       /* Otherwise, fall through to transform into a swapping load.  */
 506     }
 507   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 508                                        : operands[0];
 509 }
 510   [(set_attr "type" "vecload")
 511    (set_attr "length" "8")])
 512
 513 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 514   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 515         (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
 516   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 517   "#"
 518   "&& 1"
 519   [(set (match_dup 2)
 520         (vec_select:V16QI
 521           (match_dup 1)
 522           (parallel [(const_int 8) (const_int 9)
 523                      (const_int 10) (const_int 11)
 524                      (const_int 12) (const_int 13)
 525                      (const_int 14) (const_int 15)
 526                      (const_int 0) (const_int 1)
 527                      (const_int 2) (const_int 3)
 528                      (const_int 4) (const_int 5)
 529                      (const_int 6) (const_int 7)])))
 530    (set (match_dup 0)
 531         (vec_select:V16QI
 532           (match_dup 2)
 533           (parallel [(const_int 8) (const_int 9)
 534                      (const_int 10) (const_int 11)
 535                      (const_int 12) (const_int 13)
 536                      (const_int 14) (const_int 15)
 537                      (const_int 0) (const_int 1)
 538                      (const_int 2) (const_int 3)
 539                      (const_int 4) (const_int 5)
 540                      (const_int 6) (const_int 7)])))]
 541 {
 542   rtx mem = operands[1];
 543
 544   /* Don't apply the swap optimization if we've already performed register
 545      allocation and the hard register destination is not in the altivec
 546      range.  */
 547   if ((MEM_ALIGN (mem) >= 128)
 548       && (!HARD_REGISTER_P (operands[0])
 549           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 550     {
 551       rtx mem_address = XEXP (mem, 0);
 552       enum machine_mode mode = GET_MODE (mem);
 553
 554       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 555         {
 556           /* Replace the source memory address with masked address.  */
 557           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 558           emit_insn (lvx_set_expr);
 559           DONE;
 560         }
 561       else if (rs6000_quadword_masked_address_p (mem_address))
 562         {
 563           /* This rtl is already in the form that matches lvx
 564              instruction, so leave it alone.  */
 565           DONE;
 566         }
 567       /* Otherwise, fall through to transform into a swapping load.  */
 568     }
 569   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 570                                        : operands[0];
 571 }
 572   [(set_attr "type" "vecload")
 573    (set_attr "length" "8")])
 574
 575 (define_insn "*vsx_le_perm_store_<mode>"
 576   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
 577         (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
 578   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 579   "#"
 580   [(set_attr "type" "vecstore")
 581    (set_attr "length" "12")])
 582
 583 (define_split
 584   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 585         (match_operand:VSX_D 1 "vsx_register_operand"))]
 586   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 587   [(set (match_dup 2)
 588         (vec_select:<MODE>
 589           (match_dup 1)
 590           (parallel [(const_int 1) (const_int 0)])))
 591    (set (match_dup 0)
 592         (vec_select:<MODE>
 593           (match_dup 2)
 594           (parallel [(const_int 1) (const_int 0)])))]
 595 {
 596   rtx mem = operands[0];
 597
 598   /* Don't apply the swap optimization if we've already performed register
 599      allocation and the hard register source is not in the altivec range.  */
 600   if ((MEM_ALIGN (mem) >= 128)
 601       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 602           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 603     {
 604       rtx mem_address = XEXP (mem, 0);
 605       enum machine_mode mode = GET_MODE (mem);
 606       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 607         {
 608           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 609           emit_insn (stvx_set_expr);
 610           DONE;
 611         }
 612       else if (rs6000_quadword_masked_address_p (mem_address))
 613         {
 614           /* This rtl is already in the form that matches stvx instruction,
 615              so leave it alone.  */
 616           DONE;
 617         }
 618       /* Otherwise, fall through to transform into a swapping store.  */
 619     }
 620
 621   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 622                                        : operands[1];
 623 })
 624
 625 ;; The post-reload split requires that we re-permute the source
 626 ;; register in case it is still live.
 627 (define_split
 628   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 629         (match_operand:VSX_D 1 "vsx_register_operand"))]
 630   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 631   [(set (match_dup 1)
 632         (vec_select:<MODE>
 633           (match_dup 1)
 634           (parallel [(const_int 1) (const_int 0)])))
 635    (set (match_dup 0)
 636         (vec_select:<MODE>
 637           (match_dup 1)
 638           (parallel [(const_int 1) (const_int 0)])))
 639    (set (match_dup 1)
 640         (vec_select:<MODE>
 641           (match_dup 1)
 642           (parallel [(const_int 1) (const_int 0)])))]
 643   "")
 644
 645 (define_insn "*vsx_le_perm_store_<mode>"
 646   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
 647         (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
 648   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 649   "#"
 650   [(set_attr "type" "vecstore")
 651    (set_attr "length" "12")])
 652
 653 (define_split
 654   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 655         (match_operand:VSX_W 1 "vsx_register_operand"))]
 656   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 657   [(set (match_dup 2)
 658         (vec_select:<MODE>
 659           (match_dup 1)
 660           (parallel [(const_int 2) (const_int 3)
 661                      (const_int 0) (const_int 1)])))
 662    (set (match_dup 0)
 663         (vec_select:<MODE>
 664           (match_dup 2)
 665           (parallel [(const_int 2) (const_int 3)
 666                      (const_int 0) (const_int 1)])))]
 667 {
 668   rtx mem = operands[0];
 669
 670   /* Don't apply the swap optimization if we've already performed register
 671      allocation and the hard register source is not in the altivec range.  */
 672   if ((MEM_ALIGN (mem) >= 128)
 673       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 674           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 675     {
 676       rtx mem_address = XEXP (mem, 0);
 677       enum machine_mode mode = GET_MODE (mem);
 678       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 679         {
 680           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 681           emit_insn (stvx_set_expr);
 682           DONE;
 683         }
 684       else if (rs6000_quadword_masked_address_p (mem_address))
 685         {
 686           /* This rtl is already in the form that matches stvx instruction,
 687              so leave it alone.  */
 688           DONE;
 689         }
 690       /* Otherwise, fall through to transform into a swapping store.  */
 691     }
 692
 693   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 694                                        : operands[1];
 695 })
 696
 697 ;; The post-reload split requires that we re-permute the source
 698 ;; register in case it is still live.
 699 (define_split
 700   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 701         (match_operand:VSX_W 1 "vsx_register_operand"))]
 702   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 703   [(set (match_dup 1)
 704         (vec_select:<MODE>
 705           (match_dup 1)
 706           (parallel [(const_int 2) (const_int 3)
 707                      (const_int 0) (const_int 1)])))
 708    (set (match_dup 0)
 709         (vec_select:<MODE>
 710           (match_dup 1)
 711           (parallel [(const_int 2) (const_int 3)
 712                      (const_int 0) (const_int 1)])))
 713    (set (match_dup 1)
 714         (vec_select:<MODE>
 715           (match_dup 1)
 716           (parallel [(const_int 2) (const_int 3)
 717                      (const_int 0) (const_int 1)])))]
 718   "")
 719
 720 (define_insn "*vsx_le_perm_store_v8hi"
 721   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
 722         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 723   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 724   "#"
 725   [(set_attr "type" "vecstore")
 726    (set_attr "length" "12")])
 727
 728 (define_split
 729   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 730         (match_operand:V8HI 1 "vsx_register_operand"))]
 731   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 732   [(set (match_dup 2)
 733         (vec_select:V8HI
 734           (match_dup 1)
 735           (parallel [(const_int 4) (const_int 5)
 736                      (const_int 6) (const_int 7)
 737                      (const_int 0) (const_int 1)
 738                      (const_int 2) (const_int 3)])))
 739    (set (match_dup 0)
 740         (vec_select:V8HI
 741           (match_dup 2)
 742           (parallel [(const_int 4) (const_int 5)
 743                      (const_int 6) (const_int 7)
 744                      (const_int 0) (const_int 1)
 745                      (const_int 2) (const_int 3)])))]
 746 {
 747   rtx mem = operands[0];
 748
 749   /* Don't apply the swap optimization if we've already performed register
 750      allocation and the hard register source is not in the altivec range.  */
 751   if ((MEM_ALIGN (mem) >= 128)
 752       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 753           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 754     {
 755       rtx mem_address = XEXP (mem, 0);
 756       enum machine_mode mode = GET_MODE (mem);
 757       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 758         {
 759           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 760           emit_insn (stvx_set_expr);
 761           DONE;
 762         }
 763       else if (rs6000_quadword_masked_address_p (mem_address))
 764         {
 765           /* This rtl is already in the form that matches stvx instruction,
 766              so leave it alone.  */
 767           DONE;
 768         }
 769       /* Otherwise, fall through to transform into a swapping store.  */
 770     }
 771
 772   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 773                                        : operands[1];
 774 })
 775
 776 ;; The post-reload split requires that we re-permute the source
 777 ;; register in case it is still live.
 778 (define_split
 779   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 780         (match_operand:V8HI 1 "vsx_register_operand"))]
 781   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 782   [(set (match_dup 1)
 783         (vec_select:V8HI
 784           (match_dup 1)
 785           (parallel [(const_int 4) (const_int 5)
 786                      (const_int 6) (const_int 7)
 787                      (const_int 0) (const_int 1)
 788                      (const_int 2) (const_int 3)])))
 789    (set (match_dup 0)
 790         (vec_select:V8HI
 791           (match_dup 1)
 792           (parallel [(const_int 4) (const_int 5)
 793                      (const_int 6) (const_int 7)
 794                      (const_int 0) (const_int 1)
 795                      (const_int 2) (const_int 3)])))
 796    (set (match_dup 1)
 797         (vec_select:V8HI
 798           (match_dup 1)
 799           (parallel [(const_int 4) (const_int 5)
 800                      (const_int 6) (const_int 7)
 801                      (const_int 0) (const_int 1)
 802                      (const_int 2) (const_int 3)])))]
 803   "")
 804
 805 (define_insn "*vsx_le_perm_store_v16qi"
 806   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
 807         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 808   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 809   "#"
 810   [(set_attr "type" "vecstore")
 811    (set_attr "length" "12")])
 812
 813 (define_split
 814   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 815         (match_operand:V16QI 1 "vsx_register_operand"))]
 816   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 817   [(set (match_dup 2)
 818         (vec_select:V16QI
 819           (match_dup 1)
 820           (parallel [(const_int 8) (const_int 9)
 821                      (const_int 10) (const_int 11)
 822                      (const_int 12) (const_int 13)
 823                      (const_int 14) (const_int 15)
 824                      (const_int 0) (const_int 1)
 825                      (const_int 2) (const_int 3)
 826                      (const_int 4) (const_int 5)
 827                      (const_int 6) (const_int 7)])))
 828    (set (match_dup 0)
 829         (vec_select:V16QI
 830           (match_dup 2)
 831           (parallel [(const_int 8) (const_int 9)
 832                      (const_int 10) (const_int 11)
 833                      (const_int 12) (const_int 13)
 834                      (const_int 14) (const_int 15)
 835                      (const_int 0) (const_int 1)
 836                      (const_int 2) (const_int 3)
 837                      (const_int 4) (const_int 5)
 838                      (const_int 6) (const_int 7)])))]
 839 {
 840   rtx mem = operands[0];
 841
 842   /* Don't apply the swap optimization if we've already performed register
 843      allocation and the hard register source is not in the altivec range.  */
 844   if ((MEM_ALIGN (mem) >= 128)
 845       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 846           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 847     {
 848       rtx mem_address = XEXP (mem, 0);
 849       enum machine_mode mode = GET_MODE (mem);
 850       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 851         {
 852           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 853           emit_insn (stvx_set_expr);
 854           DONE;
 855         }
 856       else if (rs6000_quadword_masked_address_p (mem_address))
 857         {
 858           /* This rtl is already in the form that matches stvx instruction,
 859              so leave it alone.  */
 860           DONE;
 861         }
 862       /* Otherwise, fall through to transform into a swapping store.  */
 863     }
 864
 865   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 866                                        : operands[1];
 867 })
 868
 869 ;; The post-reload split requires that we re-permute the source
 870 ;; register in case it is still live.
 871 (define_split
 872   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 873         (match_operand:V16QI 1 "vsx_register_operand"))]
 874   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 875   [(set (match_dup 1)
 876         (vec_select:V16QI
 877           (match_dup 1)
 878           (parallel [(const_int 8) (const_int 9)
 879                      (const_int 10) (const_int 11)
 880                      (const_int 12) (const_int 13)
 881                      (const_int 14) (const_int 15)
 882                      (const_int 0) (const_int 1)
 883                      (const_int 2) (const_int 3)
 884                      (const_int 4) (const_int 5)
 885                      (const_int 6) (const_int 7)])))
 886    (set (match_dup 0)
 887         (vec_select:V16QI
 888           (match_dup 1)
 889           (parallel [(const_int 8) (const_int 9)
 890                      (const_int 10) (const_int 11)
 891                      (const_int 12) (const_int 13)
 892                      (const_int 14) (const_int 15)
 893                      (const_int 0) (const_int 1)
 894                      (const_int 2) (const_int 3)
 895                      (const_int 4) (const_int 5)
 896                      (const_int 6) (const_int 7)])))
 897    (set (match_dup 1)
 898         (vec_select:V16QI
 899           (match_dup 1)
 900           (parallel [(const_int 8) (const_int 9)
 901                      (const_int 10) (const_int 11)
 902                      (const_int 12) (const_int 13)
 903                      (const_int 14) (const_int 15)
 904                      (const_int 0) (const_int 1)
 905                      (const_int 2) (const_int 3)
 906                      (const_int 4) (const_int 5)
 907                      (const_int 6) (const_int 7)])))]
 908   "")
 909
 910 ;; Little endian word swapping for 128-bit types that are either scalars or the
 911 ;; special V1TI container class, which it is not appropriate to use vec_select
 912 ;; for the type.
 913 (define_insn "*vsx_le_permute_<mode>"
 914   [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
 915         (rotate:VSX_TI
 916          (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
 917          (const_int 64)))]
 918   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 919   "@
 920    xxpermdi %x0,%x1,%x1,2
 921    lxvd2x %x0,%y1
 922    stxvd2x %x1,%y0
 923    mr %0,%L1\;mr %L0,%1
 924    ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
 925    std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
 926   [(set_attr "length" "*,*,*,8,8,8")
 927    (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
 928
 929 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
 930   [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
 931         (rotate:VSX_TI
 932          (rotate:VSX_TI
 933           (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
 934           (const_int 64))
 935          (const_int 64)))]
 936   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 937   "@
 938    #
 939    xxlor %x0,%x1"
 940   ""
 941   [(set (match_dup 0) (match_dup 1))]
 942 {
 943   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
 944     {
 945       emit_note (NOTE_INSN_DELETED);
 946       DONE;
 947     }
 948 }
 949   [(set_attr "length" "0,4")
 950    (set_attr "type" "veclogical")])
 951
 952 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 953   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
 954         (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
 955   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 956   "@
 957    #
 958    #"
 959   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 960   [(const_int 0)]
 961 {
 962   rtx tmp = (can_create_pseudo_p ()
 963              ? gen_reg_rtx_and_attrs (operands[0])
 964              : operands[0]);
 965   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
 966   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
 967   DONE;
 968 }
 969   [(set_attr "type" "vecload,load")
 970    (set_attr "length" "8,8")
 971    (set_attr "isa" "<VSisa>,*")])
 972
 973 (define_insn "*vsx_le_perm_store_<mode>"
 974   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
 975         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
 976   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 977   "@
 978    #
 979    #"
 980   [(set_attr "type" "vecstore,store")
 981    (set_attr "length" "12,8")
 982    (set_attr "isa" "<VSisa>,*")])
 983
 984 (define_split
 985   [(set (match_operand:VSX_LE_128 0 "memory_operand")
 986         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
 987   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
 988   [(const_int 0)]
 989 {
 990   rtx tmp = (can_create_pseudo_p ()
 991              ? gen_reg_rtx_and_attrs (operands[0])
 992              : operands[0]);
 993   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
 994   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
 995   DONE;
 996 })
 997
 998 ;; Peepholes to catch loads and stores for TImode if TImode landed in
 999 ;; GPR registers on a little endian system.
1000 (define_peephole2
1001   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1002         (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1003                        (const_int 64)))
1004    (set (match_operand:VSX_TI 2 "int_reg_operand")
1005         (rotate:VSX_TI (match_dup 0)
1006                        (const_int 64)))]
1007   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1008    && (rtx_equal_p (operands[0], operands[2])
1009        || peep2_reg_dead_p (2, operands[0]))"
1010    [(set (match_dup 2) (match_dup 1))])
1011
1012 (define_peephole2
1013   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1014         (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1015                        (const_int 64)))
1016    (set (match_operand:VSX_TI 2 "memory_operand")
1017         (rotate:VSX_TI (match_dup 0)
1018                        (const_int 64)))]
1019   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1020    && peep2_reg_dead_p (2, operands[0])"
1021    [(set (match_dup 2) (match_dup 1))])
1022
1023 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1024 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1025 ;; floating point are handled by the more generic swap elimination pass.
1026 (define_peephole2
1027   [(set (match_operand:TI 0 "vsx_register_operand")
1028         (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1029                    (const_int 64)))
1030    (set (match_operand:TI 2 "vsx_register_operand")
1031         (rotate:TI (match_dup 0)
1032                    (const_int 64)))]
1033   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1034    && (rtx_equal_p (operands[0], operands[2])
1035        || peep2_reg_dead_p (2, operands[0]))"
1036    [(set (match_dup 2) (match_dup 1))])
1037
1038 ;; The post-reload split requires that we re-permute the source
1039 ;; register in case it is still live.
1040 (define_split
1041   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1042         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1043   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1044   [(const_int 0)]
1045 {
1046   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1047   rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1048   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1049   DONE;
1050 })
1051
1052 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1053 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1054 (define_insn "xxspltib_v16qi"
1055   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1056         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1057   "TARGET_P9_VECTOR"
1058 {
1059   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1060   return "xxspltib %x0,%2";
1061 }
1062   [(set_attr "type" "vecperm")])
1063
1064 (define_insn "xxspltib_<mode>_nosplit"
1065   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1066         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1067   "TARGET_P9_VECTOR"
1068 {
1069   rtx op1 = operands[1];
1070   int value = 256;
1071   int num_insns = -1;
1072
1073   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1074       || num_insns != 1)
1075     gcc_unreachable ();
1076
1077   operands[2] = GEN_INT (value & 0xff);
1078   return "xxspltib %x0,%2";
1079 }
1080   [(set_attr "type" "vecperm")])
1081
1082 (define_insn_and_split "*xxspltib_<mode>_split"
1083   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1084         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1085   "TARGET_P9_VECTOR"
1086   "#"
1087   "&& 1"
1088   [(const_int 0)]
1089 {
1090   int value = 256;
1091   int num_insns = -1;
1092   rtx op0 = operands[0];
1093   rtx op1 = operands[1];
1094   rtx tmp = ((can_create_pseudo_p ())
1095              ? gen_reg_rtx (V16QImode)
1096              : gen_lowpart (V16QImode, op0));
1097
1098   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1099       || num_insns != 2)
1100     gcc_unreachable ();
1101
1102   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1103
1104   if (<MODE>mode == V2DImode)
1105     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1106
1107   else if (<MODE>mode == V4SImode)
1108     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1109
1110   else if (<MODE>mode == V8HImode)
1111     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1112
1113   else
1114     gcc_unreachable ();
1115
1116   DONE;
1117 }
1118   [(set_attr "type" "vecperm")
1119    (set_attr "length" "8")])
1120
1121
1122 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1123 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1124 ;; all 1's, since the machine does not have to wait for the previous
1125 ;; instruction using the register being set (such as a store waiting on a slow
1126 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1127
1128 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1129 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1130 ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
1131 (define_insn "vsx_mov<mode>_64bit"
1132   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1133                "=ZwO,      wa,        wa,        r,         we,        ?wQ,
1134                 ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
1135                 ?wa,       v,         <??r>,     wZ,        v")
1136
1137         (match_operand:VSX_M 1 "input_operand"
1138                "wa,        ZwO,       wa,        we,        r,         r,
1139                 wQ,        Y,         r,         r,         wE,        jwM,
1140                 ?jwM,      W,         <nW>,      v,         wZ"))]
1141
1142   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1143    && (register_operand (operands[0], <MODE>mode)
1144        || register_operand (operands[1], <MODE>mode))"
1145 {
1146   return rs6000_output_move_128bit (operands);
1147 }
1148   [(set_attr "type"
1149                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1150                 store,     load,      store,     *,         vecsimple, vecsimple,
1151                 vecsimple, *,         *,         vecstore,  vecload")
1152    (set_attr "length"
1153                "*,         *,         *,         8,         *,         8,
1154                 8,         8,         8,         8,         *,         *,
1155                 *,         20,        8,         *,         *")
1156    (set_attr "isa"
1157                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1158                 *,         *,         *,         *,         p9v,       *,
1159                 <VSisa>,   *,         *,         *,         *")])
1160
1161 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1162 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
1163 ;;              LVX (VMX)  STVX (VMX)
1164 (define_insn "*vsx_mov<mode>_32bit"
1165   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1166                "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
1167                 wa,        v,         ?wa,       v,         <??r>,
1168                 wZ,        v")
1169
1170         (match_operand:VSX_M 1 "input_operand"
1171                "wa,        ZwO,       wa,        Y,         r,         r,
1172                 wE,        jwM,       ?jwM,      W,         <nW>,
1173                 v,         wZ"))]
1174
1175   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1176    && (register_operand (operands[0], <MODE>mode)
1177        || register_operand (operands[1], <MODE>mode))"
1178 {
1179   return rs6000_output_move_128bit (operands);
1180 }
1181   [(set_attr "type"
1182                "vecstore,  vecload,   vecsimple, load,      store,    *,
1183                 vecsimple, vecsimple, vecsimple, *,         *,
1184                 vecstore,  vecload")
1185    (set_attr "length"
1186                "*,         *,         *,         16,        16,        16,
1187                 *,         *,         *,         20,        16,
1188                 *,         *")
1189    (set_attr "isa"
1190                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1191                 p9v,       *,         <VSisa>,   *,         *,
1192                 *,         *")])
1193
1194 ;; Explicit  load/store expanders for the builtin functions
1195 (define_expand "vsx_load_<mode>"
1196   [(set (match_operand:VSX_M 0 "vsx_register_operand")
1197         (match_operand:VSX_M 1 "memory_operand"))]
1198   "VECTOR_MEM_VSX_P (<MODE>mode)"
1199 {
1200   /* Expand to swaps if needed, prior to swap optimization.  */
1201   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1202     {
1203       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1204       DONE;
1205     }
1206 })
1207
1208 (define_expand "vsx_store_<mode>"
1209   [(set (match_operand:VSX_M 0 "memory_operand")
1210         (match_operand:VSX_M 1 "vsx_register_operand"))]
1211   "VECTOR_MEM_VSX_P (<MODE>mode)"
1212 {
1213   /* Expand to swaps if needed, prior to swap optimization.  */
1214   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1215     {
1216       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1217       DONE;
1218     }
1219 })
1220
1221 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1222 ;; when you really want their element-reversing behavior.
1223 (define_insn "vsx_ld_elemrev_v2di"
1224   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1225         (vec_select:V2DI
1226           (match_operand:V2DI 1 "memory_operand" "Z")
1227           (parallel [(const_int 1) (const_int 0)])))]
1228   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1229   "lxvd2x %x0,%y1"
1230   [(set_attr "type" "vecload")])
1231
1232 (define_insn "vsx_ld_elemrev_v1ti"
1233   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1234         (vec_select:V1TI
1235           (match_operand:V1TI 1 "memory_operand" "Z")
1236           (parallel [(const_int 0)])))]
1237   "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1238 {
1239    return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1240 }
1241   [(set_attr "type" "vecload")])
1242
1243 (define_insn "vsx_ld_elemrev_v2df"
1244   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1245         (vec_select:V2DF
1246           (match_operand:V2DF 1 "memory_operand" "Z")
1247           (parallel [(const_int 1) (const_int 0)])))]
1248   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1249   "lxvd2x %x0,%y1"
1250   [(set_attr "type" "vecload")])
1251
1252 (define_insn "vsx_ld_elemrev_v4si"
1253   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1254         (vec_select:V4SI
1255           (match_operand:V4SI 1 "memory_operand" "Z")
1256           (parallel [(const_int 3) (const_int 2)
1257                      (const_int 1) (const_int 0)])))]
1258   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1259   "lxvw4x %x0,%y1"
1260   [(set_attr "type" "vecload")])
1261
1262 (define_insn "vsx_ld_elemrev_v4sf"
1263   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1264         (vec_select:V4SF
1265           (match_operand:V4SF 1 "memory_operand" "Z")
1266           (parallel [(const_int 3) (const_int 2)
1267                      (const_int 1) (const_int 0)])))]
1268   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1269   "lxvw4x %x0,%y1"
1270   [(set_attr "type" "vecload")])
1271
1272 (define_expand "vsx_ld_elemrev_v8hi"
1273   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1274         (vec_select:V8HI
1275           (match_operand:V8HI 1 "memory_operand" "Z")
1276           (parallel [(const_int 7) (const_int 6)
1277                      (const_int 5) (const_int 4)
1278                      (const_int 3) (const_int 2)
1279                      (const_int 1) (const_int 0)])))]
1280   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1281 {
1282   if (!TARGET_P9_VECTOR)
1283     {
1284       rtx tmp = gen_reg_rtx (V4SImode);
1285       rtx subreg, subreg2, perm[16], pcv;
1286       /* 2 is leftmost element in register */
1287       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1288       int i;
1289
1290       subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1291       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1292       subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1293
1294       for (i = 0; i < 16; ++i)
1295         perm[i] = GEN_INT (reorder[i]);
1296
1297       pcv = force_reg (V16QImode,
1298                        gen_rtx_CONST_VECTOR (V16QImode,
1299                                              gen_rtvec_v (16, perm)));
1300       emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1301                                                 subreg2, pcv));
1302       DONE;
1303     }
1304 })
1305
1306 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1307   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1308         (vec_select:V8HI
1309           (match_operand:V8HI 1 "memory_operand" "Z")
1310           (parallel [(const_int 7) (const_int 6)
1311                      (const_int 5) (const_int 4)
1312                      (const_int 3) (const_int 2)
1313                      (const_int 1) (const_int 0)])))]
1314   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1315   "lxvh8x %x0,%y1"
1316   [(set_attr "type" "vecload")])
1317
1318 (define_expand "vsx_ld_elemrev_v16qi"
1319   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1320         (vec_select:V16QI
1321           (match_operand:V16QI 1 "memory_operand" "Z")
1322           (parallel [(const_int 15) (const_int 14)
1323                      (const_int 13) (const_int 12)
1324                      (const_int 11) (const_int 10)
1325                      (const_int  9) (const_int  8)
1326                      (const_int  7) (const_int  6)
1327                      (const_int  5) (const_int  4)
1328                      (const_int  3) (const_int  2)
1329                      (const_int  1) (const_int  0)])))]
1330   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1331 {
1332   if (!TARGET_P9_VECTOR)
1333     {
1334       rtx tmp = gen_reg_rtx (V4SImode);
1335       rtx subreg, subreg2, perm[16], pcv;
1336       /* 3 is leftmost element in register */
1337       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1338       int i;
1339
1340       subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1341       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1342       subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1343
1344       for (i = 0; i < 16; ++i)
1345         perm[i] = GEN_INT (reorder[i]);
1346
1347       pcv = force_reg (V16QImode,
1348                        gen_rtx_CONST_VECTOR (V16QImode,
1349                                              gen_rtvec_v (16, perm)));
1350       emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1351                                                  subreg2, pcv));
1352       DONE;
1353     }
1354 })
1355
1356 (define_insn "vsx_ld_elemrev_v16qi_internal"
1357   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1358         (vec_select:V16QI
1359           (match_operand:V16QI 1 "memory_operand" "Z")
1360           (parallel [(const_int 15) (const_int 14)
1361                      (const_int 13) (const_int 12)
1362                      (const_int 11) (const_int 10)
1363                      (const_int  9) (const_int  8)
1364                      (const_int  7) (const_int  6)
1365                      (const_int  5) (const_int  4)
1366                      (const_int  3) (const_int  2)
1367                      (const_int  1) (const_int  0)])))]
1368   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1369   "lxvb16x %x0,%y1"
1370   [(set_attr "type" "vecload")])
1371
1372 (define_insn "vsx_st_elemrev_v1ti"
1373   [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1374         (vec_select:V1TI
1375           (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1376           (parallel [(const_int 0)])))
1377    (clobber (match_dup 1))]
1378   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1379 {
1380   return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1381 }
1382   [(set_attr "type" "vecstore")])
1383
1384 (define_insn "vsx_st_elemrev_v2df"
1385   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1386         (vec_select:V2DF
1387           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1388           (parallel [(const_int 1) (const_int 0)])))]
1389   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1390   "stxvd2x %x1,%y0"
1391   [(set_attr "type" "vecstore")])
1392
1393 (define_insn "vsx_st_elemrev_v2di"
1394   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1395         (vec_select:V2DI
1396           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1397           (parallel [(const_int 1) (const_int 0)])))]
1398   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1399   "stxvd2x %x1,%y0"
1400   [(set_attr "type" "vecstore")])
1401
1402 (define_insn "vsx_st_elemrev_v4sf"
1403   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1404         (vec_select:V4SF
1405           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1406           (parallel [(const_int 3) (const_int 2)
1407                      (const_int 1) (const_int 0)])))]
1408   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1409   "stxvw4x %x1,%y0"
1410   [(set_attr "type" "vecstore")])
1411
1412 (define_insn "vsx_st_elemrev_v4si"
1413   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1414         (vec_select:V4SI
1415           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1416           (parallel [(const_int 3) (const_int 2)
1417                      (const_int 1) (const_int 0)])))]
1418   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1419   "stxvw4x %x1,%y0"
1420   [(set_attr "type" "vecstore")])
1421
1422 (define_expand "vsx_st_elemrev_v8hi"
1423   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1424         (vec_select:V8HI
1425           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1426           (parallel [(const_int 7) (const_int 6)
1427                      (const_int 5) (const_int 4)
1428                      (const_int 3) (const_int 2)
1429                      (const_int 1) (const_int 0)])))]
1430   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1431 {
1432   if (!TARGET_P9_VECTOR)
1433     {
1434       rtx mem_subreg, subreg, perm[16], pcv;
1435       rtx tmp = gen_reg_rtx (V8HImode);
1436       /* 2 is leftmost element in register */
1437       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1438       int i;
1439
1440       for (i = 0; i < 16; ++i)
1441         perm[i] = GEN_INT (reorder[i]);
1442
1443       pcv = force_reg (V16QImode,
1444                        gen_rtx_CONST_VECTOR (V16QImode,
1445                                              gen_rtvec_v (16, perm)));
1446       emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1447                                                 operands[1], pcv));
1448       subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1449       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1450       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1451       DONE;
1452     }
1453 })
1454
1455 (define_insn "*vsx_st_elemrev_v2di_internal"
1456   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1457         (vec_select:V2DI
1458           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1459           (parallel [(const_int 1) (const_int 0)])))]
1460   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1461   "stxvd2x %x1,%y0"
1462   [(set_attr "type" "vecstore")])
1463
1464 (define_insn "*vsx_st_elemrev_v8hi_internal"
1465   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1466         (vec_select:V8HI
1467           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1468           (parallel [(const_int 7) (const_int 6)
1469                      (const_int 5) (const_int 4)
1470                      (const_int 3) (const_int 2)
1471                      (const_int 1) (const_int 0)])))]
1472   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1473   "stxvh8x %x1,%y0"
1474   [(set_attr "type" "vecstore")])
1475
1476 (define_expand "vsx_st_elemrev_v16qi"
1477   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1478         (vec_select:V16QI
1479           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1480           (parallel [(const_int 15) (const_int 14)
1481                      (const_int 13) (const_int 12)
1482                      (const_int 11) (const_int 10)
1483                      (const_int  9) (const_int  8)
1484                      (const_int  7) (const_int  6)
1485                      (const_int  5) (const_int  4)
1486                      (const_int  3) (const_int  2)
1487                      (const_int  1) (const_int  0)])))]
1488   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1489 {
1490   if (!TARGET_P9_VECTOR)
1491     {
1492       rtx mem_subreg, subreg, perm[16], pcv;
1493       rtx tmp = gen_reg_rtx (V16QImode);
1494       /* 3 is leftmost element in register */
1495       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1496       int i;
1497
1498       for (i = 0; i < 16; ++i)
1499         perm[i] = GEN_INT (reorder[i]);
1500
1501       pcv = force_reg (V16QImode,
1502                        gen_rtx_CONST_VECTOR (V16QImode,
1503                                              gen_rtvec_v (16, perm)));
1504       emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1505                                                  operands[1], pcv));
1506       subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1507       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1508       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1509       DONE;
1510     }
1511 })
1512
1513 (define_insn "*vsx_st_elemrev_v16qi_internal"
1514   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1515         (vec_select:V16QI
1516           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1517           (parallel [(const_int 15) (const_int 14)
1518                      (const_int 13) (const_int 12)
1519                      (const_int 11) (const_int 10)
1520                      (const_int  9) (const_int  8)
1521                      (const_int  7) (const_int  6)
1522                      (const_int  5) (const_int  4)
1523                      (const_int  3) (const_int  2)
1524                      (const_int  1) (const_int  0)])))]
1525   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1526   "stxvb16x %x1,%y0"
1527   [(set_attr "type" "vecstore")])
1528
1529 \f
1530 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1531 ;; instructions are now combined with the insn for the traditional floating
1532 ;; point unit.
1533 (define_insn "*vsx_add<mode>3"
1534   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1535         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1536                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1537   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1538   "xvadd<sd>p %x0,%x1,%x2"
1539   [(set_attr "type" "<VStype_simple>")])
1540
1541 (define_insn "*vsx_sub<mode>3"
1542   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1543         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1544                      (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1545   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1546   "xvsub<sd>p %x0,%x1,%x2"
1547   [(set_attr "type" "<VStype_simple>")])
1548
1549 (define_insn "*vsx_mul<mode>3"
1550   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1551         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1552                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1553   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1554   "xvmul<sd>p %x0,%x1,%x2"
1555   [(set_attr "type" "<VStype_simple>")])
1556
1557 ; Emulate vector with scalar for vec_mul in V2DImode
1558 (define_insn_and_split "vsx_mul_v2di"
1559   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1560         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1561                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1562                      UNSPEC_VSX_MULSD))]
1563   "VECTOR_MEM_VSX_P (V2DImode)"
1564   "#"
1565   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1566   [(const_int 0)]
1567 {
1568   rtx op0 = operands[0];
1569   rtx op1 = operands[1];
1570   rtx op2 = operands[2];
1571   rtx op3 = gen_reg_rtx (DImode);
1572   rtx op4 = gen_reg_rtx (DImode);
1573   rtx op5 = gen_reg_rtx (DImode);
1574   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1575   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1576   if (TARGET_POWERPC64)
1577     emit_insn (gen_muldi3 (op5, op3, op4));
1578   else
1579     {
1580       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1581       emit_move_insn (op5, ret);
1582     }
1583   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1584   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1585   if (TARGET_POWERPC64)
1586     emit_insn (gen_muldi3 (op3, op3, op4));
1587   else
1588     {
1589       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1590       emit_move_insn (op3, ret);
1591     }
1592   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1593   DONE;
1594 }
1595   [(set_attr "type" "mul")])
1596
1597 (define_insn "*vsx_div<mode>3"
1598   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1599         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1600                    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1601   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1602   "xvdiv<sd>p %x0,%x1,%x2"
1603   [(set_attr "type" "<VStype_div>")])
1604
1605 ; Emulate vector with scalar for vec_div in V2DImode
1606 (define_insn_and_split "vsx_div_v2di"
1607   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1608         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1609                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1610                      UNSPEC_VSX_DIVSD))]
1611   "VECTOR_MEM_VSX_P (V2DImode)"
1612   "#"
1613   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1614   [(const_int 0)]
1615 {
1616   rtx op0 = operands[0];
1617   rtx op1 = operands[1];
1618   rtx op2 = operands[2];
1619   rtx op3 = gen_reg_rtx (DImode);
1620   rtx op4 = gen_reg_rtx (DImode);
1621   rtx op5 = gen_reg_rtx (DImode);
1622   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1623   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1624   if (TARGET_POWERPC64)
1625     emit_insn (gen_divdi3 (op5, op3, op4));
1626   else
1627     {
1628       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1629       rtx target = emit_library_call_value (libfunc,
1630                                             op5, LCT_NORMAL, DImode,
1631                                             op3, DImode,
1632                                             op4, DImode);
1633       emit_move_insn (op5, target);
1634     }
1635   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1636   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1637   if (TARGET_POWERPC64)
1638     emit_insn (gen_divdi3 (op3, op3, op4));
1639   else
1640     {
1641       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1642       rtx target = emit_library_call_value (libfunc,
1643                                             op3, LCT_NORMAL, DImode,
1644                                             op3, DImode,
1645                                             op4, DImode);
1646       emit_move_insn (op3, target);
1647     }
1648   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1649   DONE;
1650 }
1651   [(set_attr "type" "div")])
1652
1653 (define_insn_and_split "vsx_udiv_v2di"
1654   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1655         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1656                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1657                      UNSPEC_VSX_DIVUD))]
1658   "VECTOR_MEM_VSX_P (V2DImode)"
1659   "#"
1660   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1661   [(const_int 0)]
1662 {
1663   rtx op0 = operands[0];
1664   rtx op1 = operands[1];
1665   rtx op2 = operands[2];
1666   rtx op3 = gen_reg_rtx (DImode);
1667   rtx op4 = gen_reg_rtx (DImode);
1668   rtx op5 = gen_reg_rtx (DImode);
1669   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1670   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1671   if (TARGET_POWERPC64)
1672     emit_insn (gen_udivdi3 (op5, op3, op4));
1673   else
1674     {
1675       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1676       rtx target = emit_library_call_value (libfunc,
1677                                             op5, LCT_NORMAL, DImode,
1678                                             op3, DImode,
1679                                             op4, DImode);
1680       emit_move_insn (op5, target);
1681     }
1682   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1683   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1684   if (TARGET_POWERPC64)
1685     emit_insn (gen_udivdi3 (op3, op3, op4));
1686   else
1687     {
1688       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1689       rtx target = emit_library_call_value (libfunc,
1690                                             op3, LCT_NORMAL, DImode,
1691                                             op3, DImode,
1692                                             op4, DImode);
1693       emit_move_insn (op3, target);
1694     }
1695   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1696   DONE;
1697 }
1698   [(set_attr "type" "div")])
1699
1700 ;; *tdiv* instruction returning the FG flag
1701 (define_expand "vsx_tdiv<mode>3_fg"
1702   [(set (match_dup 3)
1703         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1704                       (match_operand:VSX_B 2 "vsx_register_operand")]
1705                      UNSPEC_VSX_TDIV))
1706    (set (match_operand:SI 0 "gpc_reg_operand")
1707         (gt:SI (match_dup 3)
1708                (const_int 0)))]
1709   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1710 {
1711   operands[3] = gen_reg_rtx (CCFPmode);
1712 })
1713
1714 ;; *tdiv* instruction returning the FE flag
1715 (define_expand "vsx_tdiv<mode>3_fe"
1716   [(set (match_dup 3)
1717         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1718                       (match_operand:VSX_B 2 "vsx_register_operand")]
1719                      UNSPEC_VSX_TDIV))
1720    (set (match_operand:SI 0 "gpc_reg_operand")
1721         (eq:SI (match_dup 3)
1722                (const_int 0)))]
1723   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1724 {
1725   operands[3] = gen_reg_rtx (CCFPmode);
1726 })
1727
1728 (define_insn "*vsx_tdiv<mode>3_internal"
1729   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1730         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1731                       (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1732                    UNSPEC_VSX_TDIV))]
1733   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1734   "x<VSv>tdiv<sd>p %0,%x1,%x2"
1735   [(set_attr "type" "<VStype_simple>")])
1736
1737 (define_insn "vsx_fre<mode>2"
1738   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1739         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1740                       UNSPEC_FRES))]
1741   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1742   "xvre<sd>p %x0,%x1"
1743   [(set_attr "type" "<VStype_simple>")])
1744
1745 (define_insn "*vsx_neg<mode>2"
1746   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1747         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1748   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1749   "xvneg<sd>p %x0,%x1"
1750   [(set_attr "type" "<VStype_simple>")])
1751
1752 (define_insn "*vsx_abs<mode>2"
1753   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1754         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1755   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1756   "xvabs<sd>p %x0,%x1"
1757   [(set_attr "type" "<VStype_simple>")])
1758
1759 (define_insn "vsx_nabs<mode>2"
1760   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1761         (neg:VSX_F
1762          (abs:VSX_F
1763           (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1764   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1765   "xvnabs<sd>p %x0,%x1"
1766   [(set_attr "type" "<VStype_simple>")])
1767
1768 (define_insn "vsx_smax<mode>3"
1769   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1770         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1771                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1772   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1773   "xvmax<sd>p %x0,%x1,%x2"
1774   [(set_attr "type" "<VStype_simple>")])
1775
1776 (define_insn "*vsx_smin<mode>3"
1777   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1778         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1779                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1780   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1781   "xvmin<sd>p %x0,%x1,%x2"
1782   [(set_attr "type" "<VStype_simple>")])
1783
1784 (define_insn "*vsx_sqrt<mode>2"
1785   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1786         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1787   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1788   "xvsqrt<sd>p %x0,%x1"
1789   [(set_attr "type" "<sd>sqrt")])
1790
1791 (define_insn "*vsx_rsqrte<mode>2"
1792   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1793         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1794                       UNSPEC_RSQRT))]
1795   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1796   "xvrsqrte<sd>p %x0,%x1"
1797   [(set_attr "type" "<VStype_simple>")])
1798
1799 ;; *tsqrt* returning the fg flag
1800 (define_expand "vsx_tsqrt<mode>2_fg"
1801   [(set (match_dup 2)
1802         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1803                      UNSPEC_VSX_TSQRT))
1804    (set (match_operand:SI 0 "gpc_reg_operand")
1805         (gt:SI (match_dup 2)
1806                (const_int 0)))]
1807   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1808 {
1809   operands[2] = gen_reg_rtx (CCFPmode);
1810 })
1811
1812 ;; *tsqrt* returning the fe flag
1813 (define_expand "vsx_tsqrt<mode>2_fe"
1814   [(set (match_dup 2)
1815         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1816                      UNSPEC_VSX_TSQRT))
1817    (set (match_operand:SI 0 "gpc_reg_operand")
1818         (eq:SI (match_dup 2)
1819                (const_int 0)))]
1820   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1821 {
1822   operands[2] = gen_reg_rtx (CCFPmode);
1823 })
1824
1825 (define_insn "*vsx_tsqrt<mode>2_internal"
1826   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1827         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1828                      UNSPEC_VSX_TSQRT))]
1829   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1830   "x<VSv>tsqrt<sd>p %0,%x1"
1831   [(set_attr "type" "<VStype_simple>")])
1832
1833 ;; Fused vector multiply/add instructions. Support the classical Altivec
1834 ;; versions of fma, which allows the target to be a separate register from the
1835 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1836 ;; multiply.
1837
1838 (define_insn "*vsx_fmav4sf4"
1839   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1840         (fma:V4SF
1841           (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1842           (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1843           (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
1844   "VECTOR_UNIT_VSX_P (V4SFmode)"
1845   "@
1846    xvmaddasp %x0,%x1,%x2
1847    xvmaddmsp %x0,%x1,%x3
1848    vmaddfp %0,%1,%2,%3"
1849   [(set_attr "type" "vecfloat")])
1850
1851 (define_insn "*vsx_fmav2df4"
1852   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1853         (fma:V2DF
1854           (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1855           (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1856           (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1857   "VECTOR_UNIT_VSX_P (V2DFmode)"
1858   "@
1859    xvmaddadp %x0,%x1,%x2
1860    xvmaddmdp %x0,%x1,%x3"
1861   [(set_attr "type" "vecdouble")])
1862
1863 (define_insn "*vsx_fms<mode>4"
1864   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1865         (fma:VSX_F
1866           (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1867           (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1868           (neg:VSX_F
1869             (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1870   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1871   "@
1872    xvmsuba<sd>p %x0,%x1,%x2
1873    xvmsubm<sd>p %x0,%x1,%x3"
1874   [(set_attr "type" "<VStype_mul>")])
1875
1876 (define_insn "*vsx_nfma<mode>4"
1877   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1878         (neg:VSX_F
1879          (fma:VSX_F
1880           (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1881           (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1882           (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1883   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1884   "@
1885    xvnmadda<sd>p %x0,%x1,%x2
1886    xvnmaddm<sd>p %x0,%x1,%x3"
1887   [(set_attr "type" "<VStype_mul>")])
1888
1889 (define_insn "*vsx_nfmsv4sf4"
1890   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1891         (neg:V4SF
1892          (fma:V4SF
1893            (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1894            (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1895            (neg:V4SF
1896              (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
1897   "VECTOR_UNIT_VSX_P (V4SFmode)"
1898   "@
1899    xvnmsubasp %x0,%x1,%x2
1900    xvnmsubmsp %x0,%x1,%x3
1901    vnmsubfp %0,%1,%2,%3"
1902   [(set_attr "type" "vecfloat")])
1903
1904 (define_insn "*vsx_nfmsv2df4"
1905   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1906         (neg:V2DF
1907          (fma:V2DF
1908            (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1909            (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1910            (neg:V2DF
1911              (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1912   "VECTOR_UNIT_VSX_P (V2DFmode)"
1913   "@
1914    xvnmsubadp %x0,%x1,%x2
1915    xvnmsubmdp %x0,%x1,%x3"
1916   [(set_attr "type" "vecdouble")])
1917
1918 ;; Vector conditional expressions (no scalar version for these instructions)
1919 (define_insn "vsx_eq<mode>"
1920   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1921         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1922                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1923   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1924   "xvcmpeq<sd>p %x0,%x1,%x2"
1925   [(set_attr "type" "<VStype_simple>")])
1926
1927 (define_insn "vsx_gt<mode>"
1928   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1929         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1930                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1931   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1932   "xvcmpgt<sd>p %x0,%x1,%x2"
1933   [(set_attr "type" "<VStype_simple>")])
1934
1935 (define_insn "*vsx_ge<mode>"
1936   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1937         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1938                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1939   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1940   "xvcmpge<sd>p %x0,%x1,%x2"
1941   [(set_attr "type" "<VStype_simple>")])
1942
1943 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1944 ;; indicate a combined status
1945 (define_insn "*vsx_eq_<mode>_p"
1946   [(set (reg:CC CR6_REGNO)
1947         (unspec:CC
1948          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1949                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1950          UNSPEC_PREDICATE))
1951    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1952         (eq:VSX_F (match_dup 1)
1953                   (match_dup 2)))]
1954   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1955   "xvcmpeq<sd>p. %x0,%x1,%x2"
1956   [(set_attr "type" "<VStype_simple>")])
1957
1958 (define_insn "*vsx_gt_<mode>_p"
1959   [(set (reg:CC CR6_REGNO)
1960         (unspec:CC
1961          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1962                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1963          UNSPEC_PREDICATE))
1964    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1965         (gt:VSX_F (match_dup 1)
1966                   (match_dup 2)))]
1967   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1968   "xvcmpgt<sd>p. %x0,%x1,%x2"
1969   [(set_attr "type" "<VStype_simple>")])
1970
1971 (define_insn "*vsx_ge_<mode>_p"
1972   [(set (reg:CC CR6_REGNO)
1973         (unspec:CC
1974          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1975                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1976          UNSPEC_PREDICATE))
1977    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1978         (ge:VSX_F (match_dup 1)
1979                   (match_dup 2)))]
1980   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1981   "xvcmpge<sd>p. %x0,%x1,%x2"
1982   [(set_attr "type" "<VStype_simple>")])
1983
1984 ;; Vector select
1985 (define_insn "*vsx_xxsel<mode>"
1986   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1987         (if_then_else:VSX_L
1988          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
1989                 (match_operand:VSX_L 4 "zero_constant" ""))
1990          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
1991          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
1992   "VECTOR_MEM_VSX_P (<MODE>mode)"
1993   "xxsel %x0,%x3,%x2,%x1"
1994   [(set_attr "type" "vecmove")
1995    (set_attr "isa" "<VSisa>")])
1996
1997 (define_insn "*vsx_xxsel<mode>_uns"
1998   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1999         (if_then_else:VSX_L
2000          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2001                    (match_operand:VSX_L 4 "zero_constant" ""))
2002          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2003          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2004   "VECTOR_MEM_VSX_P (<MODE>mode)"
2005   "xxsel %x0,%x3,%x2,%x1"
2006   [(set_attr "type" "vecmove")
2007    (set_attr "isa" "<VSisa>")])
2008
2009 ;; Copy sign
2010 (define_insn "vsx_copysign<mode>3"
2011   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2012         (unspec:VSX_F
2013          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2014           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2015          UNSPEC_COPYSIGN))]
2016   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2017   "xvcpsgn<sd>p %x0,%x2,%x1"
2018   [(set_attr "type" "<VStype_simple>")])
2019
2020 ;; For the conversions, limit the register class for the integer value to be
2021 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2022 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2023 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2024 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2025 ;; in allowing virtual registers.
2026 (define_insn "vsx_float<VSi><mode>2"
2027   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2028         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2029   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2030   "xvcvsx<VSc><sd>p %x0,%x1"
2031   [(set_attr "type" "<VStype_simple>")])
2032
2033 (define_insn "vsx_floatuns<VSi><mode>2"
2034   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2035         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2036   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2037   "xvcvux<VSc><sd>p %x0,%x1"
2038   [(set_attr "type" "<VStype_simple>")])
2039
2040 (define_insn "vsx_fix_trunc<mode><VSi>2"
2041   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2042         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2043   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2044   "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2045   [(set_attr "type" "<VStype_simple>")])
2046
2047 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2048   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2049         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2050   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2051   "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2052   [(set_attr "type" "<VStype_simple>")])
2053
2054 ;; Math rounding functions
2055 (define_insn "vsx_x<VSv>r<sd>pi"
2056   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2057         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2058                       UNSPEC_VSX_ROUND_I))]
2059   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2060   "x<VSv>r<sd>pi %x0,%x1"
2061   [(set_attr "type" "<VStype_simple>")])
2062
2063 (define_insn "vsx_x<VSv>r<sd>pic"
2064   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2065         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2066                       UNSPEC_VSX_ROUND_IC))]
2067   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2068   "x<VSv>r<sd>pic %x0,%x1"
2069   [(set_attr "type" "<VStype_simple>")])
2070
2071 (define_insn "vsx_btrunc<mode>2"
2072   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2073         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2074   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2075   "xvr<sd>piz %x0,%x1"
2076   [(set_attr "type" "<VStype_simple>")])
2077
2078 (define_insn "*vsx_b2trunc<mode>2"
2079   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2080         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2081                       UNSPEC_FRIZ))]
2082   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2083   "x<VSv>r<sd>piz %x0,%x1"
2084   [(set_attr "type" "<VStype_simple>")])
2085
2086 (define_insn "vsx_floor<mode>2"
2087   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2088         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2089                       UNSPEC_FRIM))]
2090   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2091   "xvr<sd>pim %x0,%x1"
2092   [(set_attr "type" "<VStype_simple>")])
2093
2094 (define_insn "vsx_ceil<mode>2"
2095   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2096         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2097                       UNSPEC_FRIP))]
2098   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2099   "xvr<sd>pip %x0,%x1"
2100   [(set_attr "type" "<VStype_simple>")])
2101
2102 \f
2103 ;; VSX convert to/from double vector
2104
2105 ;; Convert between single and double precision
2106 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2107 ;; scalar single precision instructions internally use the double format.
2108 ;; Prefer the altivec registers, since we likely will need to do a vperm
2109 (define_insn "vsx_xscvdpsp"
2110   [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2111         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2112                               UNSPEC_VSX_CVSPDP))]
2113   "VECTOR_UNIT_VSX_P (DFmode)"
2114   "xscvdpsp %x0,%x1"
2115   [(set_attr "type" "fp")])
2116
2117 (define_insn "vsx_xvcvspdp"
2118   [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2119         (unspec:V2DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
2120                               UNSPEC_VSX_CVSPDP))]
2121   "VECTOR_UNIT_VSX_P (V4SFmode)"
2122   "xvcvspdp %x0,%x1"
2123   [(set_attr "type" "vecdouble")])
2124
2125 (define_insn "vsx_xvcvdpsp"
2126   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2127         (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2128                               UNSPEC_VSX_CVSPDP))]
2129   "VECTOR_UNIT_VSX_P (V2DFmode)"
2130   "xvcvdpsp %x0,%x1"
2131   [(set_attr "type" "vecdouble")])
2132
2133 ;; xscvspdp, represent the scalar SF type as V4SF
2134 (define_insn "vsx_xscvspdp"
2135   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2136         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2137                    UNSPEC_VSX_CVSPDP))]
2138   "VECTOR_UNIT_VSX_P (V4SFmode)"
2139   "xscvspdp %x0,%x1"
2140   [(set_attr "type" "fp")])
2141
2142 ;; Same as vsx_xscvspdp, but use SF as the type
2143 (define_insn "vsx_xscvspdp_scalar2"
2144   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2145         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2146                    UNSPEC_VSX_CVSPDP))]
2147   "VECTOR_UNIT_VSX_P (V4SFmode)"
2148   "xscvspdp %x0,%x1"
2149   [(set_attr "type" "fp")])
2150
2151 ;; Generate xvcvhpsp instruction
2152 (define_insn "vsx_xvcvhpsp"
2153   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2154         (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2155                      UNSPEC_VSX_CVHPSP))]
2156   "TARGET_P9_VECTOR"
2157   "xvcvhpsp %x0,%x1"
2158   [(set_attr "type" "vecfloat")])
2159
2160 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2161 ;; format of scalars is actually DF.
2162 (define_insn "vsx_xscvdpsp_scalar"
2163   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2164         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2165                      UNSPEC_VSX_CVSPDP))]
2166   "VECTOR_UNIT_VSX_P (V4SFmode)"
2167   "xscvdpsp %x0,%x1"
2168   [(set_attr "type" "fp")])
2169
2170 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2171 (define_insn "vsx_xscvdpspn"
2172   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2173         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2174                      UNSPEC_VSX_CVDPSPN))]
2175   "TARGET_XSCVDPSPN"
2176   "xscvdpspn %x0,%x1"
2177   [(set_attr "type" "fp")])
2178
2179 (define_insn "vsx_xscvspdpn"
2180   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2181         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2182                    UNSPEC_VSX_CVSPDPN))]
2183   "TARGET_XSCVSPDPN"
2184   "xscvspdpn %x0,%x1"
2185   [(set_attr "type" "fp")])
2186
2187 (define_insn "vsx_xscvdpspn_scalar"
2188   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2189         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2190                      UNSPEC_VSX_CVDPSPN))]
2191   "TARGET_XSCVDPSPN"
2192   "xscvdpspn %x0,%x1"
2193   [(set_attr "type" "fp")])
2194
2195 ;; Used by direct move to move a SFmode value from GPR to VSX register
2196 (define_insn "vsx_xscvspdpn_directmove"
2197   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2198         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2199                    UNSPEC_VSX_CVSPDPN))]
2200   "TARGET_XSCVSPDPN"
2201   "xscvspdpn %x0,%x1"
2202   [(set_attr "type" "fp")])
2203
2204 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2205
2206 (define_expand "vsx_xvcvsxddp_scale"
2207   [(match_operand:V2DF 0 "vsx_register_operand")
2208    (match_operand:V2DI 1 "vsx_register_operand")
2209    (match_operand:QI 2 "immediate_operand")]
2210   "VECTOR_UNIT_VSX_P (V2DFmode)"
2211 {
2212   rtx op0 = operands[0];
2213   rtx op1 = operands[1];
2214   int scale = INTVAL(operands[2]);
2215   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2216   if (scale != 0)
2217     rs6000_scale_v2df (op0, op0, -scale);
2218   DONE;
2219 })
2220
2221 (define_insn "vsx_xvcvsxddp"
2222   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2223         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2224                      UNSPEC_VSX_XVCVSXDDP))]
2225   "VECTOR_UNIT_VSX_P (V2DFmode)"
2226   "xvcvsxddp %x0,%x1"
2227   [(set_attr "type" "vecdouble")])
2228
2229 (define_expand "vsx_xvcvuxddp_scale"
2230   [(match_operand:V2DF 0 "vsx_register_operand")
2231    (match_operand:V2DI 1 "vsx_register_operand")
2232    (match_operand:QI 2 "immediate_operand")]
2233   "VECTOR_UNIT_VSX_P (V2DFmode)"
2234 {
2235   rtx op0 = operands[0];
2236   rtx op1 = operands[1];
2237   int scale = INTVAL(operands[2]);
2238   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2239   if (scale != 0)
2240     rs6000_scale_v2df (op0, op0, -scale);
2241   DONE;
2242 })
2243
2244 (define_insn "vsx_xvcvuxddp"
2245   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2246         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2247                      UNSPEC_VSX_XVCVUXDDP))]
2248   "VECTOR_UNIT_VSX_P (V2DFmode)"
2249   "xvcvuxddp %x0,%x1"
2250   [(set_attr "type" "vecdouble")])
2251
2252 (define_expand "vsx_xvcvdpsxds_scale"
2253   [(match_operand:V2DI 0 "vsx_register_operand")
2254    (match_operand:V2DF 1 "vsx_register_operand")
2255    (match_operand:QI 2 "immediate_operand")]
2256   "VECTOR_UNIT_VSX_P (V2DFmode)"
2257 {
2258   rtx op0 = operands[0];
2259   rtx op1 = operands[1];
2260   rtx tmp;
2261   int scale = INTVAL (operands[2]);
2262   if (scale == 0)
2263     tmp = op1;
2264   else
2265     {
2266       tmp  = gen_reg_rtx (V2DFmode);
2267       rs6000_scale_v2df (tmp, op1, scale);
2268     }
2269   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2270   DONE;
2271 })
2272
2273 ;; convert vector of 64-bit floating point numbers to vector of
2274 ;; 64-bit signed integer
2275 (define_insn "vsx_xvcvdpsxds"
2276   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2277         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2278                      UNSPEC_VSX_XVCVDPSXDS))]
2279   "VECTOR_UNIT_VSX_P (V2DFmode)"
2280   "xvcvdpsxds %x0,%x1"
2281   [(set_attr "type" "vecdouble")])
2282
2283 ;; convert vector of 32-bit floating point numbers to vector of
2284 ;; 32-bit signed integer
2285 (define_insn "vsx_xvcvspsxws"
2286   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2287         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2288                      UNSPEC_VSX_XVCVSPSXWS))]
2289   "VECTOR_UNIT_VSX_P (V4SFmode)"
2290   "xvcvspsxws %x0,%x1"
2291   [(set_attr "type" "vecfloat")])
2292
2293 ;; convert vector of 64-bit floating point numbers to vector of
2294 ;; 64-bit unsigned integer
2295 (define_expand "vsx_xvcvdpuxds_scale"
2296   [(match_operand:V2DI 0 "vsx_register_operand")
2297    (match_operand:V2DF 1 "vsx_register_operand")
2298    (match_operand:QI 2 "immediate_operand")]
2299   "VECTOR_UNIT_VSX_P (V2DFmode)"
2300 {
2301   rtx op0 = operands[0];
2302   rtx op1 = operands[1];
2303   rtx tmp;
2304   int scale = INTVAL (operands[2]);
2305   if (scale == 0)
2306     tmp = op1;
2307   else
2308     {
2309       tmp = gen_reg_rtx (V2DFmode);
2310       rs6000_scale_v2df (tmp, op1, scale);
2311     }
2312   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2313   DONE;
2314 })
2315
2316 ;; convert vector of 32-bit floating point numbers to vector of
2317 ;; 32-bit unsigned integer
2318 (define_insn "vsx_xvcvspuxws"
2319   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2320         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2321                      UNSPEC_VSX_XVCVSPSXWS))]
2322   "VECTOR_UNIT_VSX_P (V4SFmode)"
2323   "xvcvspuxws %x0,%x1"
2324   [(set_attr "type" "vecfloat")])
2325
2326 (define_insn "vsx_xvcvdpuxds"
2327   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2328         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2329                      UNSPEC_VSX_XVCVDPUXDS))]
2330   "VECTOR_UNIT_VSX_P (V2DFmode)"
2331   "xvcvdpuxds %x0,%x1"
2332   [(set_attr "type" "vecdouble")])
2333
2334 ;; Convert from 64-bit to 32-bit types
2335 ;; Note, favor the Altivec registers since the usual use of these instructions
2336 ;; is in vector converts and we need to use the Altivec vperm instruction.
2337
2338 (define_insn "vsx_xvcvdpsxws"
2339   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2340         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2341                      UNSPEC_VSX_CVDPSXWS))]
2342   "VECTOR_UNIT_VSX_P (V2DFmode)"
2343   "xvcvdpsxws %x0,%x1"
2344   [(set_attr "type" "vecdouble")])
2345
2346 (define_insn "vsx_xvcvdpuxws"
2347   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2348         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2349                      UNSPEC_VSX_CVDPUXWS))]
2350   "VECTOR_UNIT_VSX_P (V2DFmode)"
2351   "xvcvdpuxws %x0,%x1"
2352   [(set_attr "type" "vecdouble")])
2353
2354 (define_insn "vsx_xvcvsxdsp"
2355   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2356         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2357                      UNSPEC_VSX_CVSXDSP))]
2358   "VECTOR_UNIT_VSX_P (V2DFmode)"
2359   "xvcvsxdsp %x0,%x1"
2360   [(set_attr "type" "vecfloat")])
2361
2362 (define_insn "vsx_xvcvuxdsp"
2363   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2364         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2365                      UNSPEC_VSX_CVUXDSP))]
2366   "VECTOR_UNIT_VSX_P (V2DFmode)"
2367   "xvcvuxdsp %x0,%x1"
2368   [(set_attr "type" "vecdouble")])
2369
2370 (define_insn "vsx_xvcdpsp"
2371   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2372         (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2373                      UNSPEC_VSX_XVCDPSP))]
2374   "VECTOR_UNIT_VSX_P (V2DFmode)"
2375   "xvcvdpsp %x0,%x1"
2376   [(set_attr "type" "vecdouble")])
2377
2378 ;; Convert from 32-bit to 64-bit types
2379 ;; Provide both vector and scalar targets
2380 (define_insn "vsx_xvcvsxwdp"
2381   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2382         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2383                      UNSPEC_VSX_CVSXWDP))]
2384   "VECTOR_UNIT_VSX_P (V2DFmode)"
2385   "xvcvsxwdp %x0,%x1"
2386   [(set_attr "type" "vecdouble")])
2387
2388 (define_insn "vsx_xvcvsxwdp_df"
2389   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2390         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2391                    UNSPEC_VSX_CVSXWDP))]
2392   "TARGET_VSX"
2393   "xvcvsxwdp %x0,%x1"
2394   [(set_attr "type" "vecdouble")])
2395
2396 (define_insn "vsx_xvcvuxwdp"
2397   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2398         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2399                      UNSPEC_VSX_CVUXWDP))]
2400   "VECTOR_UNIT_VSX_P (V2DFmode)"
2401   "xvcvuxwdp %x0,%x1"
2402   [(set_attr "type" "vecdouble")])
2403
2404 (define_insn "vsx_xvcvuxwdp_df"
2405   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2406         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2407                    UNSPEC_VSX_CVUXWDP))]
2408   "TARGET_VSX"
2409   "xvcvuxwdp %x0,%x1"
2410   [(set_attr "type" "vecdouble")])
2411
2412 (define_insn "vsx_xvcvspsxds"
2413   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2414         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
2415                      UNSPEC_VSX_CVSPSXDS))]
2416   "VECTOR_UNIT_VSX_P (V2DFmode)"
2417   "xvcvspsxds %x0,%x1"
2418   [(set_attr "type" "vecdouble")])
2419
2420 (define_insn "vsx_xvcvspuxds"
2421   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2422         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
2423                      UNSPEC_VSX_CVSPUXDS))]
2424   "VECTOR_UNIT_VSX_P (V2DFmode)"
2425   "xvcvspuxds %x0,%x1"
2426   [(set_attr "type" "vecdouble")])
2427
2428 (define_insn "vsx_xvcvsxwsp"
2429   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2430         (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2431                      UNSPEC_VSX_CVSXWSP))]
2432   "VECTOR_UNIT_VSX_P (V4SFmode)"
2433   "xvcvsxwsp %x0,%x1"
2434   [(set_attr "type" "vecfloat")])
2435
2436 (define_insn "vsx_xvcvuxwsp"
2437   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2438         (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2439                     UNSPEC_VSX_CVUXWSP))]
2440   "VECTOR_UNIT_VSX_P (V4SFmode)"
2441   "xvcvuxwsp %x0,%x1"
2442   [(set_attr "type" "vecfloat")])
2443
2444 ;; Generate float2 double
2445 ;; convert two double to float
2446 (define_expand "float2_v2df"
2447   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2448    (use (match_operand:V2DF 1 "register_operand" "wa"))
2449    (use (match_operand:V2DF 2 "register_operand" "wa"))]
2450  "VECTOR_UNIT_VSX_P (V4SFmode)"
2451 {
2452   rtx rtx_src1, rtx_src2, rtx_dst;
2453
2454   rtx_dst = operands[0];
2455   rtx_src1 = operands[1];
2456   rtx_src2 = operands[2];
2457
2458   rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2459   DONE;
2460 })
2461
2462 ;; Generate float2
2463 ;; convert two long long signed ints to float
2464 (define_expand "float2_v2di"
2465   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2466    (use (match_operand:V2DI 1 "register_operand" "wa"))
2467    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2468  "VECTOR_UNIT_VSX_P (V4SFmode)"
2469 {
2470   rtx rtx_src1, rtx_src2, rtx_dst;
2471
2472   rtx_dst = operands[0];
2473   rtx_src1 = operands[1];
2474   rtx_src2 = operands[2];
2475
2476   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2477   DONE;
2478 })
2479
2480 ;; Generate uns_float2
2481 ;; convert two long long unsigned ints to float
2482 (define_expand "uns_float2_v2di"
2483   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2484    (use (match_operand:V2DI 1 "register_operand" "wa"))
2485    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2486  "VECTOR_UNIT_VSX_P (V4SFmode)"
2487 {
2488   rtx rtx_src1, rtx_src2, rtx_dst;
2489
2490   rtx_dst = operands[0];
2491   rtx_src1 = operands[1];
2492   rtx_src2 = operands[2];
2493
2494   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2495   DONE;
2496 })
2497
2498 ;; Generate floate
2499 ;; convert  double or long long signed to float
2500 ;; (Only even words are valid, BE numbering)
2501 (define_expand "floate<mode>"
2502   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2503    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2504   "VECTOR_UNIT_VSX_P (V4SFmode)"
2505 {
2506   if (BYTES_BIG_ENDIAN)
2507     {
2508       /* Shift left one word to put even word correct location */
2509       rtx rtx_tmp;
2510       rtx rtx_val = GEN_INT (4);
2511
2512       rtx_tmp = gen_reg_rtx (V4SFmode);
2513       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2514       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2515                  rtx_tmp, rtx_tmp, rtx_val));
2516     }
2517   else
2518     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2519
2520   DONE;
2521 })
2522
2523 ;; Generate uns_floate
2524 ;; convert long long unsigned to float
2525 ;; (Only even words are valid, BE numbering)
2526 (define_expand "unsfloatev2di"
2527   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2528    (use (match_operand:V2DI 1 "register_operand" "wa"))]
2529   "VECTOR_UNIT_VSX_P (V4SFmode)"
2530 {
2531   if (BYTES_BIG_ENDIAN)
2532     {
2533       /* Shift left one word to put even word correct location */
2534       rtx rtx_tmp;
2535       rtx rtx_val = GEN_INT (4);
2536
2537       rtx_tmp = gen_reg_rtx (V4SFmode);
2538       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2539       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2540                  rtx_tmp, rtx_tmp, rtx_val));
2541     }
2542   else
2543     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2544
2545   DONE;
2546 })
2547
2548 ;; Generate floato
2549 ;; convert double or long long signed to float
2550 ;; Only odd words are valid, BE numbering)
2551 (define_expand "floato<mode>"
2552   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2553    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2554   "VECTOR_UNIT_VSX_P (V4SFmode)"
2555 {
2556   if (BYTES_BIG_ENDIAN)
2557     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2558   else
2559     {
2560       /* Shift left one word to put odd word correct location */
2561       rtx rtx_tmp;
2562       rtx rtx_val = GEN_INT (4);
2563
2564       rtx_tmp = gen_reg_rtx (V4SFmode);
2565       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2566       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2567                  rtx_tmp, rtx_tmp, rtx_val));
2568     }
2569   DONE;
2570 })
2571
2572 ;; Generate uns_floato
2573 ;; convert long long unsigned to float
2574 ;; (Only odd words are valid, BE numbering)
2575 (define_expand "unsfloatov2di"
2576  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2577   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2578  "VECTOR_UNIT_VSX_P (V4SFmode)"
2579 {
2580   if (BYTES_BIG_ENDIAN)
2581     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2582   else
2583     {
2584       /* Shift left one word to put odd word correct location */
2585       rtx rtx_tmp;
2586       rtx rtx_val = GEN_INT (4);
2587
2588       rtx_tmp = gen_reg_rtx (V4SFmode);
2589       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2590       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2591                  rtx_tmp, rtx_tmp, rtx_val));
2592     }
2593   DONE;
2594 })
2595
2596 ;; Generate vsigned2
2597 ;; convert two double float vectors to a vector of single precision ints
2598 (define_expand "vsigned2_v2df"
2599   [(match_operand:V4SI 0 "register_operand" "=wa")
2600    (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2601                  (match_operand:V2DF 2 "register_operand" "wa")]
2602   UNSPEC_VSX_VSIGNED2)]
2603   "TARGET_VSX"
2604 {
2605   rtx rtx_src1, rtx_src2, rtx_dst;
2606   bool signed_convert=true;
2607
2608   rtx_dst = operands[0];
2609   rtx_src1 = operands[1];
2610   rtx_src2 = operands[2];
2611
2612   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2613   DONE;
2614 })
2615
2616 ;; Generate vsignedo_v2df
2617 ;; signed double float to int convert odd word
2618 (define_expand "vsignedo_v2df"
2619   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2620         (match_operand:V2DF 1 "register_operand" "wa"))]
2621   "TARGET_VSX"
2622 {
2623   if (BYTES_BIG_ENDIAN)
2624     {
2625       rtx rtx_tmp;
2626       rtx rtx_val = GEN_INT (12);
2627       rtx_tmp = gen_reg_rtx (V4SImode);
2628
2629       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2630
2631       /* Big endian word numbering for words in operand is 0 1 2 3.
2632          take (operand[1] operand[1]) and shift left one word
2633          0 1 2 3    0 1 2 3  =>  1 2 3 0
2634          Words 1 and 3 are now are now where they need to be for result.  */
2635
2636       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2637                  rtx_tmp, rtx_val));
2638     }
2639   else
2640     /* Little endian word numbering for operand is 3 2 1 0.
2641        Result words 3 and 1 are where they need to be.  */
2642     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2643
2644   DONE;
2645 }
2646   [(set_attr "type" "veccomplex")])
2647
2648 ;; Generate vsignede_v2df
2649 ;; signed double float to int even word
2650 (define_expand "vsignede_v2df"
2651   [(set (match_operand:V4SI 0 "register_operand" "=v")
2652         (match_operand:V2DF 1 "register_operand" "v"))]
2653   "TARGET_VSX"
2654 {
2655   if (BYTES_BIG_ENDIAN)
2656     /* Big endian word numbering for words in operand is 0 1
2657        Result words 0 is where they need to be.  */
2658     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2659
2660   else
2661     {
2662       rtx rtx_tmp;
2663       rtx rtx_val = GEN_INT (12);
2664       rtx_tmp = gen_reg_rtx (V4SImode);
2665
2666       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2667
2668       /* Little endian word numbering for operand is 3 2 1 0.
2669          take (operand[1] operand[1]) and shift left three words
2670          0 1 2 3   0 1 2 3  =>  3 0 1 2
2671          Words 0 and 2 are now where they need to be for the result.  */
2672       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2673                  rtx_tmp, rtx_val));
2674     }
2675   DONE;
2676 }
2677   [(set_attr "type" "veccomplex")])
2678
2679 ;; Generate unsigned2
2680 ;; convert two double float vectors to a vector of single precision
2681 ;; unsigned ints
2682 (define_expand "vunsigned2_v2df"
2683 [(match_operand:V4SI 0 "register_operand" "=v")
2684  (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2685                (match_operand:V2DF 2 "register_operand" "v")]
2686               UNSPEC_VSX_VSIGNED2)]
2687  "TARGET_VSX"
2688 {
2689   rtx rtx_src1, rtx_src2, rtx_dst;
2690   bool signed_convert=false;
2691
2692   rtx_dst = operands[0];
2693   rtx_src1 = operands[1];
2694   rtx_src2 = operands[2];
2695
2696   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2697   DONE;
2698 })
2699
2700 ;; Generate vunsignedo_v2df
2701 ;; unsigned double float to int convert odd word
2702 (define_expand "vunsignedo_v2df"
2703   [(set (match_operand:V4SI 0 "register_operand" "=v")
2704         (match_operand:V2DF 1 "register_operand" "v"))]
2705   "TARGET_VSX"
2706 {
2707   if (BYTES_BIG_ENDIAN)
2708     {
2709       rtx rtx_tmp;
2710       rtx rtx_val = GEN_INT (12);
2711       rtx_tmp = gen_reg_rtx (V4SImode);
2712
2713       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2714
2715       /* Big endian word numbering for words in operand is 0 1 2 3.
2716          take (operand[1] operand[1]) and shift left one word
2717          0 1 2 3    0 1 2 3  =>  1 2 3 0
2718          Words 1 and 3 are now are now where they need to be for result.  */
2719
2720       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2721                  rtx_tmp, rtx_val));
2722     }
2723   else
2724     /* Little endian word numbering for operand is 3 2 1 0.
2725        Result words 3 and 1 are where they need to be.  */
2726     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2727
2728   DONE;
2729 }
2730   [(set_attr "type" "veccomplex")])
2731
2732 ;; Generate vunsignede_v2df
2733 ;; unsigned double float to int even word
2734 (define_expand "vunsignede_v2df"
2735   [(set (match_operand:V4SI 0 "register_operand" "=v")
2736         (match_operand:V2DF 1 "register_operand" "v"))]
2737   "TARGET_VSX"
2738 {
2739   if (BYTES_BIG_ENDIAN)
2740     /* Big endian word numbering for words in operand is 0 1
2741        Result words 0 is where they need to be.  */
2742     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2743
2744   else
2745     {
2746       rtx rtx_tmp;
2747       rtx rtx_val = GEN_INT (12);
2748       rtx_tmp = gen_reg_rtx (V4SImode);
2749
2750       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2751
2752       /* Little endian word numbering for operand is 3 2 1 0.
2753          take (operand[1] operand[1]) and shift left three words
2754          0 1 2 3   0 1 2 3  =>  3 0 1 2
2755          Words 0 and 2 are now where they need to be for the result.  */
2756       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2757                  rtx_tmp, rtx_val));
2758     }
2759   DONE;
2760 }
2761   [(set_attr "type" "veccomplex")])
2762
2763 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2764 ;; since the xvrdpiz instruction does not truncate the value if the floating
2765 ;; point value is < LONG_MIN or > LONG_MAX.
2766 (define_insn "*vsx_float_fix_v2df2"
2767   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2768         (float:V2DF
2769          (fix:V2DI
2770           (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2771   "TARGET_HARD_FLOAT
2772    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2773    && !flag_trapping_math && TARGET_FRIZ"
2774   "xvrdpiz %x0,%x1"
2775   [(set_attr "type" "vecdouble")])
2776
2777 \f
2778 ;; Permute operations
2779
2780 ;; Build a V2DF/V2DI vector from two scalars
2781 (define_insn "vsx_concat_<mode>"
2782   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2783         (vec_concat:VSX_D
2784          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2785          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2786   "VECTOR_MEM_VSX_P (<MODE>mode)"
2787 {
2788   if (which_alternative == 0)
2789     return (BYTES_BIG_ENDIAN
2790             ? "xxpermdi %x0,%x1,%x2,0"
2791             : "xxpermdi %x0,%x2,%x1,0");
2792
2793   else if (which_alternative == 1)
2794     return (BYTES_BIG_ENDIAN
2795             ? "mtvsrdd %x0,%1,%2"
2796             : "mtvsrdd %x0,%2,%1");
2797
2798   else
2799     gcc_unreachable ();
2800 }
2801   [(set_attr "type" "vecperm")])
2802
2803 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2804 ;; word element in a vector register.
2805 (define_insn "*vsx_concat_<mode>_1"
2806   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2807         (vec_concat:VSX_D
2808          (vec_select:<VS_scalar>
2809           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2810           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2811          (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2812   "VECTOR_MEM_VSX_P (<MODE>mode)"
2813 {
2814   HOST_WIDE_INT dword = INTVAL (operands[2]);
2815   if (BYTES_BIG_ENDIAN)
2816     {
2817       operands[4] = GEN_INT (2*dword);
2818       return "xxpermdi %x0,%x1,%x3,%4";
2819     }
2820   else
2821     {
2822       operands[4] = GEN_INT (!dword);
2823       return "xxpermdi %x0,%x3,%x1,%4";
2824     }
2825 }
2826   [(set_attr "type" "vecperm")])
2827
2828 (define_insn "*vsx_concat_<mode>_2"
2829   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2830         (vec_concat:VSX_D
2831          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2832          (vec_select:<VS_scalar>
2833           (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2834           (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2835   "VECTOR_MEM_VSX_P (<MODE>mode)"
2836 {
2837   HOST_WIDE_INT dword = INTVAL (operands[3]);
2838   if (BYTES_BIG_ENDIAN)
2839     {
2840       operands[4] = GEN_INT (dword);
2841       return "xxpermdi %x0,%x1,%x2,%4";
2842     }
2843   else
2844     {
2845       operands[4] = GEN_INT (2 * !dword);
2846       return "xxpermdi %x0,%x2,%x1,%4";
2847     }
2848 }
2849   [(set_attr "type" "vecperm")])
2850
2851 (define_insn "*vsx_concat_<mode>_3"
2852   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2853         (vec_concat:VSX_D
2854          (vec_select:<VS_scalar>
2855           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2856           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2857          (vec_select:<VS_scalar>
2858           (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2859           (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2860   "VECTOR_MEM_VSX_P (<MODE>mode)"
2861 {
2862   HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2863   HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2864   if (BYTES_BIG_ENDIAN)
2865     {
2866       operands[5] = GEN_INT ((2 * dword1) + dword2);
2867       return "xxpermdi %x0,%x1,%x3,%5";
2868     }
2869   else
2870     {
2871       operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2872       return "xxpermdi %x0,%x3,%x1,%5";
2873     }
2874 }
2875   [(set_attr "type" "vecperm")])
2876
2877 ;; Special purpose concat using xxpermdi to glue two single precision values
2878 ;; together, relying on the fact that internally scalar floats are represented
2879 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2880 (define_insn "vsx_concat_v2sf"
2881   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2882         (unspec:V2DF
2883          [(match_operand:SF 1 "vsx_register_operand" "wa")
2884           (match_operand:SF 2 "vsx_register_operand" "wa")]
2885          UNSPEC_VSX_CONCAT))]
2886   "VECTOR_MEM_VSX_P (V2DFmode)"
2887 {
2888   if (BYTES_BIG_ENDIAN)
2889     return "xxpermdi %x0,%x1,%x2,0";
2890   else
2891     return "xxpermdi %x0,%x2,%x1,0";
2892 }
2893   [(set_attr "type" "vecperm")])
2894
2895 ;; Concatenate 4 SImode elements into a V4SImode reg.
2896 (define_expand "vsx_init_v4si"
2897   [(use (match_operand:V4SI 0 "gpc_reg_operand"))
2898    (use (match_operand:SI 1 "gpc_reg_operand"))
2899    (use (match_operand:SI 2 "gpc_reg_operand"))
2900    (use (match_operand:SI 3 "gpc_reg_operand"))
2901    (use (match_operand:SI 4 "gpc_reg_operand"))]
2902    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2903 {
2904   rtx a = gen_reg_rtx (DImode);
2905   rtx b = gen_reg_rtx (DImode);
2906   rtx c = gen_reg_rtx (DImode);
2907   rtx d = gen_reg_rtx (DImode);
2908   emit_insn (gen_zero_extendsidi2 (a, operands[1]));
2909   emit_insn (gen_zero_extendsidi2 (b, operands[2]));
2910   emit_insn (gen_zero_extendsidi2 (c, operands[3]));
2911   emit_insn (gen_zero_extendsidi2 (d, operands[4]));
2912   if (!BYTES_BIG_ENDIAN)
2913     {
2914       std::swap (a, b);
2915       std::swap (c, d);
2916     }
2917
2918   rtx aa = gen_reg_rtx (DImode);
2919   rtx ab = gen_reg_rtx (DImode);
2920   rtx cc = gen_reg_rtx (DImode);
2921   rtx cd = gen_reg_rtx (DImode);
2922   emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
2923   emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
2924   emit_insn (gen_iordi3 (ab, aa, b));
2925   emit_insn (gen_iordi3 (cd, cc, d));
2926
2927   rtx abcd = gen_reg_rtx (V2DImode);
2928   emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
2929   emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
2930   DONE;
2931 })
2932
2933 ;; xxpermdi for little endian loads and stores.  We need several of
2934 ;; these since the form of the PARALLEL differs by mode.
2935 (define_insn "*vsx_xxpermdi2_le_<mode>"
2936   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2937         (vec_select:VSX_D
2938           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
2939           (parallel [(const_int 1) (const_int 0)])))]
2940   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2941   "xxpermdi %x0,%x1,%x1,2"
2942   [(set_attr "type" "vecperm")])
2943
2944 (define_insn "*vsx_xxpermdi4_le_<mode>"
2945   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
2946         (vec_select:VSX_W
2947           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
2948           (parallel [(const_int 2) (const_int 3)
2949                      (const_int 0) (const_int 1)])))]
2950   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2951   "xxpermdi %x0,%x1,%x1,2"
2952   [(set_attr "type" "vecperm")])
2953
2954 (define_insn "*vsx_xxpermdi8_le_V8HI"
2955   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2956         (vec_select:V8HI
2957           (match_operand:V8HI 1 "vsx_register_operand" "wa")
2958           (parallel [(const_int 4) (const_int 5)
2959                      (const_int 6) (const_int 7)
2960                      (const_int 0) (const_int 1)
2961                      (const_int 2) (const_int 3)])))]
2962   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
2963   "xxpermdi %x0,%x1,%x1,2"
2964   [(set_attr "type" "vecperm")])
2965
2966 (define_insn "*vsx_xxpermdi16_le_V16QI"
2967   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2968         (vec_select:V16QI
2969           (match_operand:V16QI 1 "vsx_register_operand" "wa")
2970           (parallel [(const_int 8) (const_int 9)
2971                      (const_int 10) (const_int 11)
2972                      (const_int 12) (const_int 13)
2973                      (const_int 14) (const_int 15)
2974                      (const_int 0) (const_int 1)
2975                      (const_int 2) (const_int 3)
2976                      (const_int 4) (const_int 5)
2977                      (const_int 6) (const_int 7)])))]
2978   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
2979   "xxpermdi %x0,%x1,%x1,2"
2980   [(set_attr "type" "vecperm")])
2981
2982 ;; lxvd2x for little endian loads.  We need several of
2983 ;; these since the form of the PARALLEL differs by mode.
2984 (define_insn "*vsx_lxvd2x2_le_<mode>"
2985   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2986         (vec_select:VSX_D
2987           (match_operand:VSX_D 1 "memory_operand" "Z")
2988           (parallel [(const_int 1) (const_int 0)])))]
2989   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2990   "lxvd2x %x0,%y1"
2991   [(set_attr "type" "vecload")])
2992
2993 (define_insn "*vsx_lxvd2x4_le_<mode>"
2994   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
2995         (vec_select:VSX_W
2996           (match_operand:VSX_W 1 "memory_operand" "Z")
2997           (parallel [(const_int 2) (const_int 3)
2998                      (const_int 0) (const_int 1)])))]
2999   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3000   "lxvd2x %x0,%y1"
3001   [(set_attr "type" "vecload")])
3002
3003 (define_insn "*vsx_lxvd2x8_le_V8HI"
3004   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3005         (vec_select:V8HI
3006           (match_operand:V8HI 1 "memory_operand" "Z")
3007           (parallel [(const_int 4) (const_int 5)
3008                      (const_int 6) (const_int 7)
3009                      (const_int 0) (const_int 1)
3010                      (const_int 2) (const_int 3)])))]
3011   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3012   "lxvd2x %x0,%y1"
3013   [(set_attr "type" "vecload")])
3014
3015 (define_insn "*vsx_lxvd2x16_le_V16QI"
3016   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3017         (vec_select:V16QI
3018           (match_operand:V16QI 1 "memory_operand" "Z")
3019           (parallel [(const_int 8) (const_int 9)
3020                      (const_int 10) (const_int 11)
3021                      (const_int 12) (const_int 13)
3022                      (const_int 14) (const_int 15)
3023                      (const_int 0) (const_int 1)
3024                      (const_int 2) (const_int 3)
3025                      (const_int 4) (const_int 5)
3026                      (const_int 6) (const_int 7)])))]
3027   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3028   "lxvd2x %x0,%y1"
3029   [(set_attr "type" "vecload")])
3030
3031 ;; stxvd2x for little endian stores.  We need several of
3032 ;; these since the form of the PARALLEL differs by mode.
3033 (define_insn "*vsx_stxvd2x2_le_<mode>"
3034   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3035         (vec_select:VSX_D
3036           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3037           (parallel [(const_int 1) (const_int 0)])))]
3038   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3039   "stxvd2x %x1,%y0"
3040   [(set_attr "type" "vecstore")])
3041
3042 (define_insn "*vsx_stxvd2x4_le_<mode>"
3043   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3044         (vec_select:VSX_W
3045           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3046           (parallel [(const_int 2) (const_int 3)
3047                      (const_int 0) (const_int 1)])))]
3048   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3049   "stxvd2x %x1,%y0"
3050   [(set_attr "type" "vecstore")])
3051
3052 (define_insn "*vsx_stxvd2x8_le_V8HI"
3053   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3054         (vec_select:V8HI
3055           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3056           (parallel [(const_int 4) (const_int 5)
3057                      (const_int 6) (const_int 7)
3058                      (const_int 0) (const_int 1)
3059                      (const_int 2) (const_int 3)])))]
3060   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3061   "stxvd2x %x1,%y0"
3062   [(set_attr "type" "vecstore")])
3063
3064 (define_insn "*vsx_stxvd2x16_le_V16QI"
3065   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3066         (vec_select:V16QI
3067           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3068           (parallel [(const_int 8) (const_int 9)
3069                      (const_int 10) (const_int 11)
3070                      (const_int 12) (const_int 13)
3071                      (const_int 14) (const_int 15)
3072                      (const_int 0) (const_int 1)
3073                      (const_int 2) (const_int 3)
3074                      (const_int 4) (const_int 5)
3075                      (const_int 6) (const_int 7)])))]
3076   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3077   "stxvd2x %x1,%y0"
3078   [(set_attr "type" "vecstore")])
3079
3080 ;; Convert a TImode value into V1TImode
3081 (define_expand "vsx_set_v1ti"
3082   [(match_operand:V1TI 0 "nonimmediate_operand")
3083    (match_operand:V1TI 1 "nonimmediate_operand")
3084    (match_operand:TI 2 "input_operand")
3085    (match_operand:QI 3 "u5bit_cint_operand")]
3086   "VECTOR_MEM_VSX_P (V1TImode)"
3087 {
3088   if (operands[3] != const0_rtx)
3089     gcc_unreachable ();
3090
3091   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3092   DONE;
3093 })
3094
3095 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3096 (define_expand "vsx_set_<mode>"
3097   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3098    (use (match_operand:VSX_D 1 "vsx_register_operand"))
3099    (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3100    (use (match_operand:QI 3 "const_0_to_1_operand"))]
3101   "VECTOR_MEM_VSX_P (<MODE>mode)"
3102 {
3103   rtx dest = operands[0];
3104   rtx vec_reg = operands[1];
3105   rtx value = operands[2];
3106   rtx ele = operands[3];
3107   rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3108
3109   if (ele == const0_rtx)
3110     {
3111       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3112       emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3113       DONE;
3114     }
3115   else if (ele == const1_rtx)
3116     {
3117       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3118       emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3119       DONE;
3120     }
3121   else
3122     gcc_unreachable ();
3123 })
3124
3125 ;; Extract a DF/DI element from V2DF/V2DI
3126 ;; Optimize cases were we can do a simple or direct move.
3127 ;; Or see if we can avoid doing the move at all
3128
3129 ;; There are some unresolved problems with reload that show up if an Altivec
3130 ;; register was picked.  Limit the scalar value to FPRs for now.
3131
3132 (define_insn "vsx_extract_<mode>"
3133   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d,  wr, wr")
3134         (vec_select:<VS_scalar>
3135          (match_operand:VSX_D 1 "gpc_reg_operand"      "wa, wa, wa, wa")
3136          (parallel
3137           [(match_operand:QI 2 "const_0_to_1_operand"  "wD, n,  wD, n")])))]
3138   "VECTOR_MEM_VSX_P (<MODE>mode)"
3139 {
3140   int element = INTVAL (operands[2]);
3141   int op0_regno = REGNO (operands[0]);
3142   int op1_regno = REGNO (operands[1]);
3143   int fldDM;
3144
3145   gcc_assert (IN_RANGE (element, 0, 1));
3146   gcc_assert (VSX_REGNO_P (op1_regno));
3147
3148   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3149     {
3150       if (op0_regno == op1_regno)
3151         return ASM_COMMENT_START " vec_extract to same register";
3152
3153       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3154                && TARGET_POWERPC64)
3155         return "mfvsrd %0,%x1";
3156
3157       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3158         return "fmr %0,%1";
3159
3160       else if (VSX_REGNO_P (op0_regno))
3161         return "xxlor %x0,%x1,%x1";
3162
3163       else
3164         gcc_unreachable ();
3165     }
3166
3167   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3168            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3169     return "mfvsrld %0,%x1";
3170
3171   else if (VSX_REGNO_P (op0_regno))
3172     {
3173       fldDM = element << 1;
3174       if (!BYTES_BIG_ENDIAN)
3175         fldDM = 3 - fldDM;
3176       operands[3] = GEN_INT (fldDM);
3177       return "xxpermdi %x0,%x1,%x1,%3";
3178     }
3179
3180   else
3181     gcc_unreachable ();
3182 }
3183   [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")
3184    (set_attr "isa" "*,*,p8v,p9v")])
3185
3186 ;; Optimize extracting a single scalar element from memory.
3187 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3188   [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3189         (vec_select:<VSX_D:VS_scalar>
3190          (match_operand:VSX_D 1 "memory_operand" "m,m")
3191          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3192    (clobber (match_scratch:P 3 "=&b,&b"))]
3193   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3194   "#"
3195   "&& reload_completed"
3196   [(set (match_dup 0) (match_dup 4))]
3197 {
3198   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3199                                            operands[3], <VSX_D:VS_scalar>mode);
3200 }
3201   [(set_attr "type" "fpload,load")
3202    (set_attr "length" "8")])
3203
3204 ;; Optimize storing a single scalar element that is the right location to
3205 ;; memory
3206 (define_insn "*vsx_extract_<mode>_store"
3207   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3208         (vec_select:<VS_scalar>
3209          (match_operand:VSX_D 1 "register_operand" "d,v,v")
3210          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3211   "VECTOR_MEM_VSX_P (<MODE>mode)"
3212   "@
3213    stfd%U0%X0 %1,%0
3214    stxsdx %x1,%y0
3215    stxsd %1,%0"
3216   [(set_attr "type" "fpstore")
3217    (set_attr "isa" "*,p7v,p9v")])
3218
3219 ;; Variable V2DI/V2DF extract shift
3220 (define_insn "vsx_vslo_<mode>"
3221   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3222         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3223                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3224                             UNSPEC_VSX_VSLO))]
3225   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3226   "vslo %0,%1,%2"
3227   [(set_attr "type" "vecperm")])
3228
3229 ;; Variable V2DI/V2DF extract
3230 (define_insn_and_split "vsx_extract_<mode>_var"
3231   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,wa,r")
3232         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3233                              (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3234                             UNSPEC_VSX_EXTRACT))
3235    (clobber (match_scratch:DI 3 "=r,&b,&b"))
3236    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3237   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3238   "#"
3239   "&& reload_completed"
3240   [(const_int 0)]
3241 {
3242   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3243                                 operands[3], operands[4]);
3244   DONE;
3245 })
3246
3247 ;; Extract a SF element from V4SF
3248 (define_insn_and_split "vsx_extract_v4sf"
3249   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3250         (vec_select:SF
3251          (match_operand:V4SF 1 "vsx_register_operand" "wa")
3252          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3253    (clobber (match_scratch:V4SF 3 "=0"))]
3254   "VECTOR_UNIT_VSX_P (V4SFmode)"
3255   "#"
3256   "&& 1"
3257   [(const_int 0)]
3258 {
3259   rtx op0 = operands[0];
3260   rtx op1 = operands[1];
3261   rtx op2 = operands[2];
3262   rtx op3 = operands[3];
3263   rtx tmp;
3264   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3265
3266   if (ele == 0)
3267     tmp = op1;
3268   else
3269     {
3270       if (GET_CODE (op3) == SCRATCH)
3271         op3 = gen_reg_rtx (V4SFmode);
3272       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3273       tmp = op3;
3274     }
3275   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3276   DONE;
3277 }
3278   [(set_attr "length" "8")
3279    (set_attr "type" "fp")])
3280
3281 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3282   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3283         (vec_select:SF
3284          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3285          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3286    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3287   "VECTOR_MEM_VSX_P (V4SFmode)"
3288   "#"
3289   "&& reload_completed"
3290   [(set (match_dup 0) (match_dup 4))]
3291 {
3292   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3293                                            operands[3], SFmode);
3294 }
3295   [(set_attr "type" "fpload,fpload,fpload,load")
3296    (set_attr "length" "8")
3297    (set_attr "isa" "*,p7v,p9v,*")])
3298
3299 ;; Variable V4SF extract
3300 (define_insn_and_split "vsx_extract_v4sf_var"
3301   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,wa,?r")
3302         (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3303                     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3304                    UNSPEC_VSX_EXTRACT))
3305    (clobber (match_scratch:DI 3 "=r,&b,&b"))
3306    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3307   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3308   "#"
3309   "&& reload_completed"
3310   [(const_int 0)]
3311 {
3312   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3313                                 operands[3], operands[4]);
3314   DONE;
3315 })
3316
3317 ;; Expand the builtin form of xxpermdi to canonical rtl.
3318 (define_expand "vsx_xxpermdi_<mode>"
3319   [(match_operand:VSX_L 0 "vsx_register_operand")
3320    (match_operand:VSX_L 1 "vsx_register_operand")
3321    (match_operand:VSX_L 2 "vsx_register_operand")
3322    (match_operand:QI 3 "u5bit_cint_operand")]
3323   "VECTOR_MEM_VSX_P (<MODE>mode)"
3324 {
3325   rtx target = operands[0];
3326   rtx op0 = operands[1];
3327   rtx op1 = operands[2];
3328   int mask = INTVAL (operands[3]);
3329   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3330   rtx perm1 = GEN_INT ((mask & 1) + 2);
3331   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3332
3333   if (<MODE>mode == V2DFmode)
3334     gen = gen_vsx_xxpermdi2_v2df_1;
3335   else
3336     {
3337       gen = gen_vsx_xxpermdi2_v2di_1;
3338       if (<MODE>mode != V2DImode)
3339         {
3340           target = gen_lowpart (V2DImode, target);
3341           op0 = gen_lowpart (V2DImode, op0);
3342           op1 = gen_lowpart (V2DImode, op1);
3343         }
3344     }
3345   emit_insn (gen (target, op0, op1, perm0, perm1));
3346   DONE;
3347 })
3348
3349 ;; Special version of xxpermdi that retains big-endian semantics.
3350 (define_expand "vsx_xxpermdi_<mode>_be"
3351   [(match_operand:VSX_L 0 "vsx_register_operand")
3352    (match_operand:VSX_L 1 "vsx_register_operand")
3353    (match_operand:VSX_L 2 "vsx_register_operand")
3354    (match_operand:QI 3 "u5bit_cint_operand")]
3355   "VECTOR_MEM_VSX_P (<MODE>mode)"
3356 {
3357   rtx target = operands[0];
3358   rtx op0 = operands[1];
3359   rtx op1 = operands[2];
3360   int mask = INTVAL (operands[3]);
3361   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3362   rtx perm1 = GEN_INT ((mask & 1) + 2);
3363   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3364
3365   if (<MODE>mode == V2DFmode)
3366     gen = gen_vsx_xxpermdi2_v2df_1;
3367   else
3368     {
3369       gen = gen_vsx_xxpermdi2_v2di_1;
3370       if (<MODE>mode != V2DImode)
3371         {
3372           target = gen_lowpart (V2DImode, target);
3373           op0 = gen_lowpart (V2DImode, op0);
3374           op1 = gen_lowpart (V2DImode, op1);
3375         }
3376     }
3377   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3378      transformation we don't want; it is necessary for
3379      rs6000_expand_vec_perm_const_1 but not for this use.  So we
3380      prepare for that by reversing the transformation here.  */
3381   if (BYTES_BIG_ENDIAN)
3382     emit_insn (gen (target, op0, op1, perm0, perm1));
3383   else
3384     {
3385       rtx p0 = GEN_INT (3 - INTVAL (perm1));
3386       rtx p1 = GEN_INT (3 - INTVAL (perm0));
3387       emit_insn (gen (target, op1, op0, p0, p1));
3388     }
3389   DONE;
3390 })
3391
3392 (define_insn "vsx_xxpermdi2_<mode>_1"
3393   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3394         (vec_select:VSX_D
3395           (vec_concat:<VS_double>
3396             (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3397             (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3398           (parallel [(match_operand 3 "const_0_to_1_operand" "")
3399                      (match_operand 4 "const_2_to_3_operand" "")])))]
3400   "VECTOR_MEM_VSX_P (<MODE>mode)"
3401 {
3402   int op3, op4, mask;
3403
3404   /* For little endian, swap operands and invert/swap selectors
3405      to get the correct xxpermdi.  The operand swap sets up the
3406      inputs as a little endian array.  The selectors are swapped
3407      because they are defined to use big endian ordering.  The
3408      selectors are inverted to get the correct doublewords for
3409      little endian ordering.  */
3410   if (BYTES_BIG_ENDIAN)
3411     {
3412       op3 = INTVAL (operands[3]);
3413       op4 = INTVAL (operands[4]);
3414     }
3415   else
3416     {
3417       op3 = 3 - INTVAL (operands[4]);
3418       op4 = 3 - INTVAL (operands[3]);
3419     }
3420
3421   mask = (op3 << 1) | (op4 - 2);
3422   operands[3] = GEN_INT (mask);
3423
3424   if (BYTES_BIG_ENDIAN)
3425     return "xxpermdi %x0,%x1,%x2,%3";
3426   else
3427     return "xxpermdi %x0,%x2,%x1,%3";
3428 }
3429   [(set_attr "type" "vecperm")])
3430
3431 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3432 ;; none of the small types were allowed in a vector register, so we had to
3433 ;; extract to a DImode and either do a direct move or store.
3434 (define_expand  "vsx_extract_<mode>"
3435   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3436                    (vec_select:<VS_scalar>
3437                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3438                     (parallel [(match_operand:QI 2 "const_int_operand")])))
3439               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3440   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3441 {
3442   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3443   if (TARGET_P9_VECTOR)
3444     {
3445       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3446                                             operands[2]));
3447       DONE;
3448     }
3449 })
3450
3451 (define_insn "vsx_extract_<mode>_p9"
3452   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3453         (vec_select:<VS_scalar>
3454          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3455          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3456    (clobber (match_scratch:SI 3 "=r,X"))]
3457   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3458 {
3459   if (which_alternative == 0)
3460     return "#";
3461
3462   else
3463     {
3464       HOST_WIDE_INT elt = INTVAL (operands[2]);
3465       HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3466                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3467                                : elt);
3468
3469       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3470       HOST_WIDE_INT offset = unit_size * elt_adj;
3471
3472       operands[2] = GEN_INT (offset);
3473       if (unit_size == 4)
3474         return "xxextractuw %x0,%x1,%2";
3475       else
3476         return "vextractu<wd> %0,%1,%2";
3477     }
3478 }
3479   [(set_attr "type" "vecsimple")
3480    (set_attr "isa" "p9v,*")])
3481
3482 (define_split
3483   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3484         (vec_select:<VS_scalar>
3485          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3486          (parallel [(match_operand:QI 2 "const_int_operand")])))
3487    (clobber (match_operand:SI 3 "int_reg_operand"))]
3488   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3489   [(const_int 0)]
3490 {
3491   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3492   rtx op1 = operands[1];
3493   rtx op2 = operands[2];
3494   rtx op3 = operands[3];
3495   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3496
3497   emit_move_insn (op3, GEN_INT (offset));
3498   if (BYTES_BIG_ENDIAN)
3499     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3500   else
3501     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3502   DONE;
3503 })
3504
3505 ;; Optimize zero extracts to eliminate the AND after the extract.
3506 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3507   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3508         (zero_extend:DI
3509          (vec_select:<VS_scalar>
3510           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3511           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3512    (clobber (match_scratch:SI 3 "=r,X"))]
3513   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3514   "#"
3515   "&& reload_completed"
3516   [(parallel [(set (match_dup 4)
3517                    (vec_select:<VS_scalar>
3518                     (match_dup 1)
3519                     (parallel [(match_dup 2)])))
3520               (clobber (match_dup 3))])]
3521 {
3522   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3523 }
3524   [(set_attr "isa" "p9v,*")])
3525
3526 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3527 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3528   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3529         (vec_select:<VS_scalar>
3530          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3531          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3532    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3533    (clobber (match_scratch:SI 4 "=X,&r"))]
3534   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3535   "#"
3536   "&& reload_completed"
3537   [(parallel [(set (match_dup 3)
3538                    (vec_select:<VS_scalar>
3539                     (match_dup 1)
3540                     (parallel [(match_dup 2)])))
3541               (clobber (match_dup 4))])
3542    (set (match_dup 0)
3543         (match_dup 3))])
3544
3545 (define_insn_and_split  "*vsx_extract_si"
3546   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3547         (vec_select:SI
3548          (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3549          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3550    (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3551   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3552   "#"
3553   "&& reload_completed"
3554   [(const_int 0)]
3555 {
3556   rtx dest = operands[0];
3557   rtx src = operands[1];
3558   rtx element = operands[2];
3559   rtx vec_tmp = operands[3];
3560   int value;
3561
3562   if (!BYTES_BIG_ENDIAN)
3563     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3564
3565   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3566      instruction.  */
3567   value = INTVAL (element);
3568   if (value != 1)
3569     emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3570   else
3571     vec_tmp = src;
3572
3573   if (MEM_P (operands[0]))
3574     {
3575       if (can_create_pseudo_p ())
3576         dest = rs6000_force_indexed_or_indirect_mem (dest);
3577
3578       if (TARGET_P8_VECTOR)
3579         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3580       else
3581         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3582     }
3583
3584   else if (TARGET_P8_VECTOR)
3585     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3586   else
3587     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3588                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
3589
3590   DONE;
3591 }
3592   [(set_attr "type" "mftgpr,vecperm,fpstore")
3593    (set_attr "length" "8")
3594    (set_attr "isa" "*,p8v,*")])
3595
3596 (define_insn_and_split  "*vsx_extract_<mode>_p8"
3597   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3598         (vec_select:<VS_scalar>
3599          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3600          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3601    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3602   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3603    && !TARGET_P9_VECTOR"
3604   "#"
3605   "&& reload_completed"
3606   [(const_int 0)]
3607 {
3608   rtx dest = operands[0];
3609   rtx src = operands[1];
3610   rtx element = operands[2];
3611   rtx vec_tmp = operands[3];
3612   int value;
3613
3614   if (!BYTES_BIG_ENDIAN)
3615     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3616
3617   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3618      instruction.  */
3619   value = INTVAL (element);
3620   if (<MODE>mode == V16QImode)
3621     {
3622       if (value != 7)
3623         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3624       else
3625         vec_tmp = src;
3626     }
3627   else if (<MODE>mode == V8HImode)
3628     {
3629       if (value != 3)
3630         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3631       else
3632         vec_tmp = src;
3633     }
3634   else
3635     gcc_unreachable ();
3636
3637   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3638                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
3639   DONE;
3640 }
3641   [(set_attr "type" "mftgpr")])
3642
3643 ;; Optimize extracting a single scalar element from memory.
3644 (define_insn_and_split "*vsx_extract_<mode>_load"
3645   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3646         (vec_select:<VS_scalar>
3647          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3648          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3649    (clobber (match_scratch:DI 3 "=&b"))]
3650   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3651   "#"
3652   "&& reload_completed"
3653   [(set (match_dup 0) (match_dup 4))]
3654 {
3655   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3656                                            operands[3], <VS_scalar>mode);
3657 }
3658   [(set_attr "type" "load")
3659    (set_attr "length" "8")])
3660
3661 ;; Variable V16QI/V8HI/V4SI extract
3662 (define_insn_and_split "vsx_extract_<mode>_var"
3663   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3664         (unspec:<VS_scalar>
3665          [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,v,m")
3666           (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3667          UNSPEC_VSX_EXTRACT))
3668    (clobber (match_scratch:DI 3 "=r,r,&b"))
3669    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3670   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3671   "#"
3672   "&& reload_completed"
3673   [(const_int 0)]
3674 {
3675   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3676                                 operands[3], operands[4]);
3677   DONE;
3678 }
3679   [(set_attr "isa" "p9v,*,*")])
3680
3681 (define_insn_and_split "*vsx_extract_<mode>_<VS_scalar>mode_var"
3682   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3683         (zero_extend:<VS_scalar>
3684          (unspec:<VSX_EXTRACT_I:VS_scalar>
3685           [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,v,m")
3686            (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3687           UNSPEC_VSX_EXTRACT)))
3688    (clobber (match_scratch:DI 3 "=r,r,&b"))
3689    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3690   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3691   "#"
3692   "&& reload_completed"
3693   [(const_int 0)]
3694 {
3695   machine_mode smode = <VS_scalar>mode;
3696   rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3697                                 operands[1], operands[2],
3698                                 operands[3], operands[4]);
3699   DONE;
3700 }
3701   [(set_attr "isa" "p9v,*,*")])
3702
3703 ;; VSX_EXTRACT optimizations
3704 ;; Optimize double d = (double) vec_extract (vi, <n>)
3705 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3706 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3707   [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
3708         (any_float:DF
3709          (vec_select:SI
3710           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3711           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3712    (clobber (match_scratch:V4SI 3 "=v"))]
3713   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3714   "#"
3715   "&& 1"
3716   [(const_int 0)]
3717 {
3718   rtx dest = operands[0];
3719   rtx src = operands[1];
3720   rtx element = operands[2];
3721   rtx v4si_tmp = operands[3];
3722   int value;
3723
3724   if (!BYTES_BIG_ENDIAN)
3725     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3726
3727   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3728      instruction.  */
3729   value = INTVAL (element);
3730   if (value != 0)
3731     {
3732       if (GET_CODE (v4si_tmp) == SCRATCH)
3733         v4si_tmp = gen_reg_rtx (V4SImode);
3734       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3735     }
3736   else
3737     v4si_tmp = src;
3738
3739   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3740   DONE;
3741 })
3742
3743 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3744 ;; where <type> is a floating point type that supported by the hardware that is
3745 ;; not double.  First convert the value to double, and then to the desired
3746 ;; type.
3747 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3748   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
3749         (any_float:VSX_EXTRACT_FL
3750          (vec_select:SI
3751           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3752           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3753    (clobber (match_scratch:V4SI 3 "=v"))
3754    (clobber (match_scratch:DF 4 "=wa"))]
3755   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3756   "#"
3757   "&& 1"
3758   [(const_int 0)]
3759 {
3760   rtx dest = operands[0];
3761   rtx src = operands[1];
3762   rtx element = operands[2];
3763   rtx v4si_tmp = operands[3];
3764   rtx df_tmp = operands[4];
3765   int value;
3766
3767   if (!BYTES_BIG_ENDIAN)
3768     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3769
3770   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3771      instruction.  */
3772   value = INTVAL (element);
3773   if (value != 0)
3774     {
3775       if (GET_CODE (v4si_tmp) == SCRATCH)
3776         v4si_tmp = gen_reg_rtx (V4SImode);
3777       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3778     }
3779   else
3780     v4si_tmp = src;
3781
3782   if (GET_CODE (df_tmp) == SCRATCH)
3783     df_tmp = gen_reg_rtx (DFmode);
3784
3785   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3786
3787   if (<MODE>mode == SFmode)
3788     emit_insn (gen_truncdfsf2 (dest, df_tmp));
3789   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3790     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3791   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3792            && TARGET_FLOAT128_HW)
3793     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3794   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3795     emit_insn (gen_extenddfif2 (dest, df_tmp));
3796   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3797     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3798   else
3799     gcc_unreachable ();
3800
3801   DONE;
3802 })
3803
3804 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3805 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3806 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3807 ;; vector short or vector unsigned short.
3808 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3809   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3810         (float:FL_CONV
3811          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3812           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3813           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3814    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3815   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3816    && TARGET_P9_VECTOR"
3817   "#"
3818   "&& reload_completed"
3819   [(parallel [(set (match_dup 3)
3820                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3821                     (match_dup 1)
3822                     (parallel [(match_dup 2)])))
3823               (clobber (scratch:SI))])
3824    (set (match_dup 4)
3825         (sign_extend:DI (match_dup 3)))
3826    (set (match_dup 0)
3827         (float:<FL_CONV:MODE> (match_dup 4)))]
3828 {
3829   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3830 }
3831   [(set_attr "isa" "<VSisa>")])
3832
3833 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3834   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3835         (unsigned_float:FL_CONV
3836          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3837           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3838           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3839    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3840   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3841    && TARGET_P9_VECTOR"
3842   "#"
3843   "&& reload_completed"
3844   [(parallel [(set (match_dup 3)
3845                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3846                     (match_dup 1)
3847                     (parallel [(match_dup 2)])))
3848               (clobber (scratch:SI))])
3849    (set (match_dup 0)
3850         (float:<FL_CONV:MODE> (match_dup 4)))]
3851 {
3852   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3853 }
3854   [(set_attr "isa" "<VSisa>")])
3855
3856 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3857 (define_insn "vsx_set_<mode>_p9"
3858   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3859         (unspec:VSX_EXTRACT_I
3860          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3861           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3862           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3863          UNSPEC_VSX_SET))]
3864   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3865 {
3866   int ele = INTVAL (operands[3]);
3867   int nunits = GET_MODE_NUNITS (<MODE>mode);
3868
3869   if (!BYTES_BIG_ENDIAN)
3870     ele = nunits - 1 - ele;
3871
3872   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3873   if (<MODE>mode == V4SImode)
3874     return "xxinsertw %x0,%x2,%3";
3875   else
3876     return "vinsert<wd> %0,%2,%3";
3877 }
3878   [(set_attr "type" "vecperm")])
3879
3880 (define_insn_and_split "vsx_set_v4sf_p9"
3881   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3882         (unspec:V4SF
3883          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3884           (match_operand:SF 2 "gpc_reg_operand" "wa")
3885           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3886          UNSPEC_VSX_SET))
3887    (clobber (match_scratch:SI 4 "=&wa"))]
3888   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3889   "#"
3890   "&& reload_completed"
3891   [(set (match_dup 5)
3892         (unspec:V4SF [(match_dup 2)]
3893                      UNSPEC_VSX_CVDPSPN))
3894    (parallel [(set (match_dup 4)
3895                    (vec_select:SI (match_dup 6)
3896                                   (parallel [(match_dup 7)])))
3897               (clobber (scratch:SI))])
3898    (set (match_dup 8)
3899         (unspec:V4SI [(match_dup 8)
3900                       (match_dup 4)
3901                       (match_dup 3)]
3902                      UNSPEC_VSX_SET))]
3903 {
3904   unsigned int tmp_regno = reg_or_subregno (operands[4]);
3905
3906   operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3907   operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3908   operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
3909   operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3910 }
3911   [(set_attr "type" "vecperm")
3912    (set_attr "length" "12")
3913    (set_attr "isa" "p9v")])
3914
3915 ;; Special case setting 0.0f to a V4SF element
3916 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3917   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3918         (unspec:V4SF
3919          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3920           (match_operand:SF 2 "zero_fp_constant" "j")
3921           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3922          UNSPEC_VSX_SET))
3923    (clobber (match_scratch:SI 4 "=&wa"))]
3924   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3925   "#"
3926   "&& reload_completed"
3927   [(set (match_dup 4)
3928         (const_int 0))
3929    (set (match_dup 5)
3930         (unspec:V4SI [(match_dup 5)
3931                       (match_dup 4)
3932                       (match_dup 3)]
3933                      UNSPEC_VSX_SET))]
3934 {
3935   operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3936 }
3937   [(set_attr "type" "vecperm")
3938    (set_attr "length" "8")
3939    (set_attr "isa" "p9v")])
3940
3941 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
3942 ;; that is in the default scalar position (1 for big endian, 2 for little
3943 ;; endian).  We just need to do an xxinsertw since the element is in the
3944 ;; correct location.
3945
3946 (define_insn "*vsx_insert_extract_v4sf_p9"
3947   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3948         (unspec:V4SF
3949          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3950           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3951                          (parallel
3952                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3953           (match_operand:QI 4 "const_0_to_3_operand" "n")]
3954          UNSPEC_VSX_SET))]
3955   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
3956    && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
3957 {
3958   int ele = INTVAL (operands[4]);
3959
3960   if (!BYTES_BIG_ENDIAN)
3961     ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
3962
3963   operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
3964   return "xxinsertw %x0,%x2,%4";
3965 }
3966   [(set_attr "type" "vecperm")])
3967
3968 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
3969 ;; that is in the default scalar position (1 for big endian, 2 for little
3970 ;; endian).  Convert the insert/extract to int and avoid doing the conversion.
3971
3972 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
3973   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3974         (unspec:V4SF
3975          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3976           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3977                          (parallel
3978                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3979           (match_operand:QI 4 "const_0_to_3_operand" "n")]
3980          UNSPEC_VSX_SET))
3981    (clobber (match_scratch:SI 5 "=&wa"))]
3982   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
3983    && TARGET_P9_VECTOR && TARGET_POWERPC64
3984    && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
3985   "#"
3986   "&& 1"
3987   [(parallel [(set (match_dup 5)
3988                    (vec_select:SI (match_dup 6)
3989                                   (parallel [(match_dup 3)])))
3990               (clobber (scratch:SI))])
3991    (set (match_dup 7)
3992         (unspec:V4SI [(match_dup 8)
3993                       (match_dup 5)
3994                       (match_dup 4)]
3995                      UNSPEC_VSX_SET))]
3996 {
3997   if (GET_CODE (operands[5]) == SCRATCH)
3998     operands[5] = gen_reg_rtx (SImode);
3999
4000   operands[6] = gen_lowpart (V4SImode, operands[2]);
4001   operands[7] = gen_lowpart (V4SImode, operands[0]);
4002   operands[8] = gen_lowpart (V4SImode, operands[1]);
4003 }
4004   [(set_attr "type" "vecperm")
4005    (set_attr "isa" "p9v")])
4006
4007 ;; Expanders for builtins
4008 (define_expand "vsx_mergel_<mode>"
4009   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4010    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4011    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4012   "VECTOR_MEM_VSX_P (<MODE>mode)"
4013 {
4014   rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4015   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4016   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4017   emit_insn (gen_rtx_SET (operands[0], x));
4018   DONE;
4019 })
4020
4021 (define_expand "vsx_mergeh_<mode>"
4022   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4023    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4024    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4025   "VECTOR_MEM_VSX_P (<MODE>mode)"
4026 {
4027   rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4028   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4029   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4030   emit_insn (gen_rtx_SET (operands[0], x));
4031   DONE;
4032 })
4033
4034 ;; V2DF/V2DI splat
4035 ;; We separate the register splat insn from the memory splat insn to force the
4036 ;; register allocator to generate the indexed form of the SPLAT when it is
4037 ;; given an offsettable memory reference.  Otherwise, if the register and
4038 ;; memory insns were combined into a single insn, the register allocator will
4039 ;; load the value into a register, and then do a double word permute.
4040 (define_expand "vsx_splat_<mode>"
4041   [(set (match_operand:VSX_D 0 "vsx_register_operand")
4042         (vec_duplicate:VSX_D
4043          (match_operand:<VS_scalar> 1 "input_operand")))]
4044   "VECTOR_MEM_VSX_P (<MODE>mode)"
4045 {
4046   rtx op1 = operands[1];
4047   if (MEM_P (op1))
4048     operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4049   else if (!REG_P (op1))
4050     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4051 })
4052
4053 (define_insn "vsx_splat_<mode>_reg"
4054   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4055         (vec_duplicate:VSX_D
4056          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4057   "VECTOR_MEM_VSX_P (<MODE>mode)"
4058   "@
4059    xxpermdi %x0,%x1,%x1,0
4060    mtvsrdd %x0,%1,%1"
4061   [(set_attr "type" "vecperm")])
4062
4063 (define_insn "vsx_splat_<mode>_mem"
4064   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4065         (vec_duplicate:VSX_D
4066          (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4067   "VECTOR_MEM_VSX_P (<MODE>mode)"
4068   "lxvdsx %x0,%y1"
4069   [(set_attr "type" "vecload")])
4070
4071 ;; V4SI splat support
4072 (define_insn "vsx_splat_v4si"
4073   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4074         (vec_duplicate:V4SI
4075          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4076   "TARGET_P9_VECTOR"
4077   "@
4078    mtvsrws %x0,%1
4079    lxvwsx %x0,%y1"
4080   [(set_attr "type" "vecperm,vecload")])
4081
4082 ;; SImode is not currently allowed in vector registers.  This pattern
4083 ;; allows us to use direct move to get the value in a vector register
4084 ;; so that we can use XXSPLTW
4085 (define_insn "vsx_splat_v4si_di"
4086   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4087         (vec_duplicate:V4SI
4088          (truncate:SI
4089           (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4090   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4091   "@
4092    xxspltw %x0,%x1,1
4093    mtvsrws %x0,%1"
4094   [(set_attr "type" "vecperm")
4095    (set_attr "isa" "p8v,*")])
4096
4097 ;; V4SF splat (ISA 3.0)
4098 (define_insn_and_split "vsx_splat_v4sf"
4099   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4100         (vec_duplicate:V4SF
4101          (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4102   "TARGET_P9_VECTOR"
4103   "@
4104    lxvwsx %x0,%y1
4105    #
4106    mtvsrws %x0,%1"
4107   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4108   [(set (match_dup 0)
4109         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4110    (set (match_dup 0)
4111         (unspec:V4SF [(match_dup 0)
4112                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4113   ""
4114   [(set_attr "type" "vecload,vecperm,mftgpr")
4115    (set_attr "length" "*,8,*")
4116    (set_attr "isa" "*,p8v,*")])
4117
4118 ;; V4SF/V4SI splat from a vector element
4119 (define_insn "vsx_xxspltw_<mode>"
4120   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4121         (vec_duplicate:VSX_W
4122          (vec_select:<VS_scalar>
4123           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4124           (parallel
4125            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4126   "VECTOR_MEM_VSX_P (<MODE>mode)"
4127 {
4128   if (!BYTES_BIG_ENDIAN)
4129     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4130
4131   return "xxspltw %x0,%x1,%2";
4132 }
4133   [(set_attr "type" "vecperm")])
4134
4135 (define_insn "vsx_xxspltw_<mode>_direct"
4136   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4137         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4138                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4139                       UNSPEC_VSX_XXSPLTW))]
4140   "VECTOR_MEM_VSX_P (<MODE>mode)"
4141   "xxspltw %x0,%x1,%2"
4142   [(set_attr "type" "vecperm")])
4143
4144 ;; V16QI/V8HI splat support on ISA 2.07
4145 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4146   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4147         (vec_duplicate:VSX_SPLAT_I
4148          (truncate:<VS_scalar>
4149           (match_operand:DI 1 "altivec_register_operand" "v"))))]
4150   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4151   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4152   [(set_attr "type" "vecperm")])
4153
4154 ;; V2DF/V2DI splat for use by vec_splat builtin
4155 (define_insn "vsx_xxspltd_<mode>"
4156   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4157         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4158                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4159                       UNSPEC_VSX_XXSPLTD))]
4160   "VECTOR_MEM_VSX_P (<MODE>mode)"
4161 {
4162   if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4163       || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4164     return "xxpermdi %x0,%x1,%x1,0";
4165   else
4166     return "xxpermdi %x0,%x1,%x1,3";
4167 }
4168   [(set_attr "type" "vecperm")])
4169
4170 ;; V4SF/V4SI interleave
4171 (define_insn "vsx_xxmrghw_<mode>"
4172   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4173         (vec_select:VSX_W
4174           (vec_concat:<VS_double>
4175             (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4176             (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4177           (parallel [(const_int 0) (const_int 4)
4178                      (const_int 1) (const_int 5)])))]
4179   "VECTOR_MEM_VSX_P (<MODE>mode)"
4180 {
4181   if (BYTES_BIG_ENDIAN)
4182     return "xxmrghw %x0,%x1,%x2";
4183   else
4184     return "xxmrglw %x0,%x2,%x1";
4185 }
4186   [(set_attr "type" "vecperm")])
4187
4188 (define_insn "vsx_xxmrglw_<mode>"
4189   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4190         (vec_select:VSX_W
4191           (vec_concat:<VS_double>
4192             (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4193             (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4194           (parallel [(const_int 2) (const_int 6)
4195                      (const_int 3) (const_int 7)])))]
4196   "VECTOR_MEM_VSX_P (<MODE>mode)"
4197 {
4198   if (BYTES_BIG_ENDIAN)
4199     return "xxmrglw %x0,%x1,%x2";
4200   else
4201     return "xxmrghw %x0,%x2,%x1";
4202 }
4203   [(set_attr "type" "vecperm")])
4204
4205 ;; Shift left double by word immediate
4206 (define_insn "vsx_xxsldwi_<mode>"
4207   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4208         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4209                        (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4210                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
4211                       UNSPEC_VSX_SLDWI))]
4212   "VECTOR_MEM_VSX_P (<MODE>mode)"
4213   "xxsldwi %x0,%x1,%x2,%3"
4214   [(set_attr "type" "vecperm")
4215    (set_attr "isa" "<VSisa>")])
4216
4217 \f
4218 ;; Vector reduction insns and splitters
4219
4220 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4221   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4222         (VEC_reduc:V2DF
4223          (vec_concat:V2DF
4224           (vec_select:DF
4225            (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4226            (parallel [(const_int 1)]))
4227           (vec_select:DF
4228            (match_dup 1)
4229            (parallel [(const_int 0)])))
4230          (match_dup 1)))
4231    (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4232   "VECTOR_UNIT_VSX_P (V2DFmode)"
4233   "#"
4234   ""
4235   [(const_int 0)]
4236 {
4237   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4238              ? gen_reg_rtx (V2DFmode)
4239              : operands[2];
4240   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4241   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4242   DONE;
4243 }
4244   [(set_attr "length" "8")
4245    (set_attr "type" "veccomplex")])
4246
4247 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4248   [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4249         (VEC_reduc:V4SF
4250          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4251          (match_operand:V4SF 1 "vfloat_operand" "wa")))
4252    (clobber (match_scratch:V4SF 2 "=&wa"))
4253    (clobber (match_scratch:V4SF 3 "=&wa"))]
4254   "VECTOR_UNIT_VSX_P (V4SFmode)"
4255   "#"
4256   ""
4257   [(const_int 0)]
4258 {
4259   rtx op0 = operands[0];
4260   rtx op1 = operands[1];
4261   rtx tmp2, tmp3, tmp4;
4262
4263   if (can_create_pseudo_p ())
4264     {
4265       tmp2 = gen_reg_rtx (V4SFmode);
4266       tmp3 = gen_reg_rtx (V4SFmode);
4267       tmp4 = gen_reg_rtx (V4SFmode);
4268     }
4269   else
4270     {
4271       tmp2 = operands[2];
4272       tmp3 = operands[3];
4273       tmp4 = tmp2;
4274     }
4275
4276   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4277   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4278   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4279   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4280   DONE;
4281 }
4282   [(set_attr "length" "16")
4283    (set_attr "type" "veccomplex")])
4284
4285 ;; Combiner patterns with the vector reduction patterns that knows we can get
4286 ;; to the top element of the V2DF array without doing an extract.
4287
4288 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4289   [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4290         (vec_select:DF
4291          (VEC_reduc:V2DF
4292           (vec_concat:V2DF
4293            (vec_select:DF
4294             (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4295             (parallel [(const_int 1)]))
4296            (vec_select:DF
4297             (match_dup 1)
4298             (parallel [(const_int 0)])))
4299           (match_dup 1))
4300          (parallel [(const_int 1)])))
4301    (clobber (match_scratch:DF 2 "=0,&wa"))]
4302   "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4303   "#"
4304   ""
4305   [(const_int 0)]
4306 {
4307   rtx hi = gen_highpart (DFmode, operands[1]);
4308   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4309             ? gen_reg_rtx (DFmode)
4310             : operands[2];
4311
4312   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4313   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4314   DONE;
4315 }
4316   [(set_attr "length" "8")
4317    (set_attr "type" "veccomplex")])
4318
4319 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4320   [(set (match_operand:SF 0 "vfloat_operand" "=f")
4321         (vec_select:SF
4322          (VEC_reduc:V4SF
4323           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4324           (match_operand:V4SF 1 "vfloat_operand" "wa"))
4325          (parallel [(const_int 3)])))
4326    (clobber (match_scratch:V4SF 2 "=&wa"))
4327    (clobber (match_scratch:V4SF 3 "=&wa"))
4328    (clobber (match_scratch:V4SF 4 "=0"))]
4329   "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4330   "#"
4331   ""
4332   [(const_int 0)]
4333 {
4334   rtx op0 = operands[0];
4335   rtx op1 = operands[1];
4336   rtx tmp2, tmp3, tmp4, tmp5;
4337
4338   if (can_create_pseudo_p ())
4339     {
4340       tmp2 = gen_reg_rtx (V4SFmode);
4341       tmp3 = gen_reg_rtx (V4SFmode);
4342       tmp4 = gen_reg_rtx (V4SFmode);
4343       tmp5 = gen_reg_rtx (V4SFmode);
4344     }
4345   else
4346     {
4347       tmp2 = operands[2];
4348       tmp3 = operands[3];
4349       tmp4 = tmp2;
4350       tmp5 = operands[4];
4351     }
4352
4353   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4354   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4355   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4356   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4357   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4358   DONE;
4359 }
4360   [(set_attr "length" "20")
4361    (set_attr "type" "veccomplex")])
4362
4363 \f
4364 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4365 (define_peephole
4366   [(set (match_operand:P 0 "base_reg_operand")
4367         (match_operand:P 1 "short_cint_operand"))
4368    (set (match_operand:VSX_M 2 "vsx_register_operand")
4369         (mem:VSX_M (plus:P (match_dup 0)
4370                            (match_operand:P 3 "int_reg_operand"))))]
4371   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4372   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4373   [(set_attr "length" "8")
4374    (set_attr "type" "vecload")])
4375
4376 (define_peephole
4377   [(set (match_operand:P 0 "base_reg_operand")
4378         (match_operand:P 1 "short_cint_operand"))
4379    (set (match_operand:VSX_M 2 "vsx_register_operand")
4380         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4381                            (match_dup 0))))]
4382   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4383   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4384   [(set_attr "length" "8")
4385    (set_attr "type" "vecload")])
4386
4387 \f
4388 ;; ISA 3.0 vector extend sign support
4389
4390 (define_insn "vsx_sign_extend_qi_<mode>"
4391   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4392         (unspec:VSINT_84
4393          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4394          UNSPEC_VSX_SIGN_EXTEND))]
4395   "TARGET_P9_VECTOR"
4396   "vextsb2<wd> %0,%1"
4397   [(set_attr "type" "vecexts")])
4398
4399 (define_insn "vsx_sign_extend_hi_<mode>"
4400   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4401         (unspec:VSINT_84
4402          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4403          UNSPEC_VSX_SIGN_EXTEND))]
4404   "TARGET_P9_VECTOR"
4405   "vextsh2<wd> %0,%1"
4406   [(set_attr "type" "vecexts")])
4407
4408 (define_insn "*vsx_sign_extend_si_v2di"
4409   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4410         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4411                      UNSPEC_VSX_SIGN_EXTEND))]
4412   "TARGET_P9_VECTOR"
4413   "vextsw2d %0,%1"
4414   [(set_attr "type" "vecexts")])
4415
4416 \f
4417 ;; ISA 3.0 Binary Floating-Point Support
4418
4419 ;; VSX Scalar Extract Exponent Quad-Precision
4420 (define_insn "xsxexpqp_<mode>"
4421   [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4422         (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4423          UNSPEC_VSX_SXEXPDP))]
4424   "TARGET_P9_VECTOR"
4425   "xsxexpqp %0,%1"
4426   [(set_attr "type" "vecmove")])
4427
4428 ;; VSX Scalar Extract Exponent Double-Precision
4429 (define_insn "xsxexpdp"
4430   [(set (match_operand:DI 0 "register_operand" "=r")
4431         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4432          UNSPEC_VSX_SXEXPDP))]
4433   "TARGET_P9_VECTOR && TARGET_64BIT"
4434   "xsxexpdp %0,%x1"
4435   [(set_attr "type" "integer")])
4436
4437 ;; VSX Scalar Extract Significand Quad-Precision
4438 (define_insn "xsxsigqp_<mode>"
4439   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4440         (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4441          UNSPEC_VSX_SXSIG))]
4442   "TARGET_P9_VECTOR"
4443   "xsxsigqp %0,%1"
4444   [(set_attr "type" "vecmove")])
4445
4446 ;; VSX Scalar Extract Significand Double-Precision
4447 (define_insn "xsxsigdp"
4448   [(set (match_operand:DI 0 "register_operand" "=r")
4449         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4450          UNSPEC_VSX_SXSIG))]
4451   "TARGET_P9_VECTOR && TARGET_64BIT"
4452   "xsxsigdp %0,%x1"
4453   [(set_attr "type" "integer")])
4454
4455 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4456 (define_insn "xsiexpqpf_<mode>"
4457   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4458         (unspec:IEEE128
4459          [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4460           (match_operand:DI 2 "altivec_register_operand" "v")]
4461          UNSPEC_VSX_SIEXPQP))]
4462   "TARGET_P9_VECTOR"
4463   "xsiexpqp %0,%1,%2"
4464   [(set_attr "type" "vecmove")])
4465
4466 ;; VSX Scalar Insert Exponent Quad-Precision
4467 (define_insn "xsiexpqp_<mode>"
4468   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4469         (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4470                          (match_operand:DI 2 "altivec_register_operand" "v")]
4471          UNSPEC_VSX_SIEXPQP))]
4472   "TARGET_P9_VECTOR"
4473   "xsiexpqp %0,%1,%2"
4474   [(set_attr "type" "vecmove")])
4475
4476 ;; VSX Scalar Insert Exponent Double-Precision
4477 (define_insn "xsiexpdp"
4478   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4479         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4480                     (match_operand:DI 2 "register_operand" "r")]
4481          UNSPEC_VSX_SIEXPDP))]
4482   "TARGET_P9_VECTOR && TARGET_64BIT"
4483   "xsiexpdp %x0,%1,%2"
4484   [(set_attr "type" "fpsimple")])
4485
4486 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4487 (define_insn "xsiexpdpf"
4488   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4489         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4490                     (match_operand:DI 2 "register_operand" "r")]
4491          UNSPEC_VSX_SIEXPDP))]
4492   "TARGET_P9_VECTOR && TARGET_64BIT"
4493   "xsiexpdp %x0,%1,%2"
4494   [(set_attr "type" "fpsimple")])
4495
4496 ;; VSX Scalar Compare Exponents Double-Precision
4497 (define_expand "xscmpexpdp_<code>"
4498   [(set (match_dup 3)
4499         (compare:CCFP
4500          (unspec:DF
4501           [(match_operand:DF 1 "vsx_register_operand" "wa")
4502            (match_operand:DF 2 "vsx_register_operand" "wa")]
4503           UNSPEC_VSX_SCMPEXPDP)
4504          (const_int 0)))
4505    (set (match_operand:SI 0 "register_operand" "=r")
4506         (CMP_TEST:SI (match_dup 3)
4507                      (const_int 0)))]
4508   "TARGET_P9_VECTOR"
4509 {
4510   operands[3] = gen_reg_rtx (CCFPmode);
4511 })
4512
4513 (define_insn "*xscmpexpdp"
4514   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4515         (compare:CCFP
4516          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4517                      (match_operand:DF 2 "vsx_register_operand" "wa")]
4518           UNSPEC_VSX_SCMPEXPDP)
4519          (match_operand:SI 3 "zero_constant" "j")))]
4520   "TARGET_P9_VECTOR"
4521   "xscmpexpdp %0,%x1,%x2"
4522   [(set_attr "type" "fpcompare")])
4523
4524 ;; VSX Scalar Compare Exponents Quad-Precision
4525 (define_expand "xscmpexpqp_<code>_<mode>"
4526   [(set (match_dup 3)
4527         (compare:CCFP
4528          (unspec:IEEE128
4529           [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4530            (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4531           UNSPEC_VSX_SCMPEXPQP)
4532          (const_int 0)))
4533    (set (match_operand:SI 0 "register_operand" "=r")
4534         (CMP_TEST:SI (match_dup 3)
4535                      (const_int 0)))]
4536   "TARGET_P9_VECTOR"
4537 {
4538   operands[3] = gen_reg_rtx (CCFPmode);
4539 })
4540
4541 (define_insn "*xscmpexpqp"
4542   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4543         (compare:CCFP
4544          (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4545                           (match_operand:IEEE128 2 "altivec_register_operand" "v")]
4546           UNSPEC_VSX_SCMPEXPQP)
4547          (match_operand:SI 3 "zero_constant" "j")))]
4548   "TARGET_P9_VECTOR"
4549   "xscmpexpqp %0,%1,%2"
4550   [(set_attr "type" "fpcompare")])
4551
4552 ;; VSX Scalar Test Data Class Quad-Precision
4553 ;;  (Expansion for scalar_test_data_class (__ieee128, int))
4554 ;;   (Has side effect of setting the lt bit if operand 1 is negative,
4555 ;;    setting the eq bit if any of the conditions tested by operand 2
4556 ;;    are satisfied, and clearing the gt and undordered bits to zero.)
4557 (define_expand "xststdcqp_<mode>"
4558   [(set (match_dup 3)
4559         (compare:CCFP
4560          (unspec:IEEE128
4561           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4562            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4563           UNSPEC_VSX_STSTDC)
4564          (const_int 0)))
4565    (set (match_operand:SI 0 "register_operand" "=r")
4566         (eq:SI (match_dup 3)
4567                (const_int 0)))]
4568   "TARGET_P9_VECTOR"
4569 {
4570   operands[3] = gen_reg_rtx (CCFPmode);
4571 })
4572
4573 ;; VSX Scalar Test Data Class Double- and Single-Precision
4574 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4575 ;;   if any of the conditions tested by operand 2 are satisfied.
4576 ;;   The gt and unordered bits are cleared to zero.)
4577 (define_expand "xststdc<sd>p"
4578   [(set (match_dup 3)
4579         (compare:CCFP
4580          (unspec:SFDF
4581           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4582            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4583           UNSPEC_VSX_STSTDC)
4584          (match_dup 4)))
4585    (set (match_operand:SI 0 "register_operand" "=r")
4586         (eq:SI (match_dup 3)
4587                (const_int 0)))]
4588   "TARGET_P9_VECTOR"
4589 {
4590   operands[3] = gen_reg_rtx (CCFPmode);
4591   operands[4] = CONST0_RTX (SImode);
4592 })
4593
4594 ;; The VSX Scalar Test Negative Quad-Precision
4595 (define_expand "xststdcnegqp_<mode>"
4596   [(set (match_dup 2)
4597         (compare:CCFP
4598          (unspec:IEEE128
4599           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4600            (const_int 0)]
4601           UNSPEC_VSX_STSTDC)
4602          (const_int 0)))
4603    (set (match_operand:SI 0 "register_operand" "=r")
4604         (lt:SI (match_dup 2)
4605                (const_int 0)))]
4606   "TARGET_P9_VECTOR"
4607 {
4608   operands[2] = gen_reg_rtx (CCFPmode);
4609 })
4610
4611 ;; The VSX Scalar Test Negative Double- and Single-Precision
4612 (define_expand "xststdcneg<sd>p"
4613   [(set (match_dup 2)
4614         (compare:CCFP
4615          (unspec:SFDF
4616           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4617            (const_int 0)]
4618           UNSPEC_VSX_STSTDC)
4619          (match_dup 3)))
4620    (set (match_operand:SI 0 "register_operand" "=r")
4621         (lt:SI (match_dup 2)
4622                (const_int 0)))]
4623   "TARGET_P9_VECTOR"
4624 {
4625   operands[2] = gen_reg_rtx (CCFPmode);
4626   operands[3] = CONST0_RTX (SImode);
4627 })
4628
4629 (define_insn "*xststdcqp_<mode>"
4630   [(set (match_operand:CCFP 0 "" "=y")
4631         (compare:CCFP
4632          (unspec:IEEE128
4633           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4634            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4635           UNSPEC_VSX_STSTDC)
4636          (const_int 0)))]
4637   "TARGET_P9_VECTOR"
4638   "xststdcqp %0,%1,%2"
4639   [(set_attr "type" "fpcompare")])
4640
4641 (define_insn "*xststdc<sd>p"
4642   [(set (match_operand:CCFP 0 "" "=y")
4643         (compare:CCFP
4644          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4645                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
4646           UNSPEC_VSX_STSTDC)
4647          (match_operand:SI 3 "zero_constant" "j")))]
4648   "TARGET_P9_VECTOR"
4649   "xststdc<sd>p %0,%x1,%2"
4650   [(set_attr "type" "fpcompare")])
4651
4652 ;; VSX Vector Extract Exponent Double and Single Precision
4653 (define_insn "xvxexp<sd>p"
4654   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4655         (unspec:VSX_F
4656          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4657          UNSPEC_VSX_VXEXP))]
4658   "TARGET_P9_VECTOR"
4659   "xvxexp<sd>p %x0,%x1"
4660   [(set_attr "type" "vecsimple")])
4661
4662 ;; VSX Vector Extract Significand Double and Single Precision
4663 (define_insn "xvxsig<sd>p"
4664   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4665         (unspec:VSX_F
4666          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4667          UNSPEC_VSX_VXSIG))]
4668   "TARGET_P9_VECTOR"
4669   "xvxsig<sd>p %x0,%x1"
4670   [(set_attr "type" "vecsimple")])
4671
4672 ;; VSX Vector Insert Exponent Double and Single Precision
4673 (define_insn "xviexp<sd>p"
4674   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4675         (unspec:VSX_F
4676          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4677           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4678          UNSPEC_VSX_VIEXP))]
4679   "TARGET_P9_VECTOR"
4680   "xviexp<sd>p %x0,%x1,%x2"
4681   [(set_attr "type" "vecsimple")])
4682
4683 ;; VSX Vector Test Data Class Double and Single Precision
4684 ;; The corresponding elements of the result vector are all ones
4685 ;; if any of the conditions tested by operand 3 are satisfied.
4686 (define_insn "xvtstdc<sd>p"
4687   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4688         (unspec:<VSI>
4689          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4690           (match_operand:SI 2 "u7bit_cint_operand" "n")]
4691          UNSPEC_VSX_VTSTDC))]
4692   "TARGET_P9_VECTOR"
4693   "xvtstdc<sd>p %x0,%x1,%2"
4694   [(set_attr "type" "vecsimple")])
4695
4696 ;; ISA 3.0 String Operations Support
4697
4698 ;; Compare vectors producing a vector result and a predicate, setting CR6
4699 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4700 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4701 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4702 ;; to use Power8 instructions.
4703 (define_insn "*vsx_ne_<mode>_p"
4704   [(set (reg:CC CR6_REGNO)
4705         (unspec:CC
4706          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4707                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4708          UNSPEC_PREDICATE))
4709    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4710         (ne:VSX_EXTRACT_I (match_dup 1)
4711                           (match_dup 2)))]
4712   "TARGET_P9_VECTOR"
4713   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4714   [(set_attr "type" "vecsimple")])
4715
4716 (define_insn "*vector_nez_<mode>_p"
4717   [(set (reg:CC CR6_REGNO)
4718         (unspec:CC [(unspec:VI
4719                      [(match_operand:VI 1 "gpc_reg_operand" "v")
4720                       (match_operand:VI 2 "gpc_reg_operand" "v")]
4721                      UNSPEC_NEZ_P)]
4722          UNSPEC_PREDICATE))
4723    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4724         (unspec:VI [(match_dup 1)
4725                     (match_dup 2)]
4726          UNSPEC_NEZ_P))]
4727   "TARGET_P9_VECTOR"
4728   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4729   [(set_attr "type" "vecsimple")])
4730
4731 ;; Return first position of match between vectors using natural order
4732 ;; for both LE and BE execution modes.
4733 (define_expand "first_match_index_<mode>"
4734   [(match_operand:SI 0 "register_operand")
4735    (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4736                (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4737   UNSPEC_VSX_FIRST_MATCH_INDEX)]
4738   "TARGET_P9_VECTOR"
4739 {
4740   int sh;
4741
4742   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4743   rtx not_result = gen_reg_rtx (<MODE>mode);
4744
4745   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4746                                              operands[2]));
4747   emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4748
4749   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4750
4751   if (<MODE>mode == V16QImode)
4752     {
4753       if (!BYTES_BIG_ENDIAN)
4754         emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4755       else
4756         emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4757     }
4758   else
4759     {
4760       rtx tmp = gen_reg_rtx (SImode);
4761       if (!BYTES_BIG_ENDIAN)
4762         emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4763       else
4764         emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4765       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4766     }
4767   DONE;
4768 })
4769
4770 ;; Return first position of match between vectors or end of string (EOS) using
4771 ;; natural element order for both LE and BE execution modes.
4772 (define_expand "first_match_or_eos_index_<mode>"
4773   [(match_operand:SI 0 "register_operand")
4774    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4775    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4776   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4777   "TARGET_P9_VECTOR"
4778 {
4779   int sh;
4780   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4781   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4782   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4783   rtx and_result = gen_reg_rtx (<MODE>mode);
4784   rtx result = gen_reg_rtx (<MODE>mode);
4785   rtx vzero = gen_reg_rtx (<MODE>mode);
4786
4787   /* Vector with zeros in elements that correspond to zeros in operands.  */
4788   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4789   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4790   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4791   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4792
4793   /* Vector with ones in elments that do not match.  */
4794   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4795                                              operands[2]));
4796
4797   /* Create vector with ones in elements where there was a zero in one of
4798      the source elements or the elements that match.  */
4799   emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4800   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4801
4802   if (<MODE>mode == V16QImode)
4803     {
4804       if (!BYTES_BIG_ENDIAN)
4805         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4806       else
4807         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4808     }
4809   else
4810     {
4811       rtx tmp = gen_reg_rtx (SImode);
4812       if (!BYTES_BIG_ENDIAN)
4813         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4814       else
4815         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4816       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4817     }
4818   DONE;
4819 })
4820
4821 ;; Return first position of mismatch between vectors using natural
4822 ;; element order for both LE and BE execution modes.
4823 (define_expand "first_mismatch_index_<mode>"
4824   [(match_operand:SI 0 "register_operand")
4825    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4826    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4827   UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4828   "TARGET_P9_VECTOR"
4829 {
4830   int sh;
4831   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4832
4833   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4834                                             operands[2]));
4835   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4836
4837   if (<MODE>mode == V16QImode)
4838     {
4839       if (!BYTES_BIG_ENDIAN)
4840         emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4841       else
4842         emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4843     }
4844   else
4845     {
4846       rtx tmp = gen_reg_rtx (SImode);
4847       if (!BYTES_BIG_ENDIAN)
4848         emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4849       else
4850         emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4851       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4852     }
4853   DONE;
4854 })
4855
4856 ;; Return first position of mismatch between vectors or end of string (EOS)
4857 ;; using natural element order for both LE and BE execution modes.
4858 (define_expand "first_mismatch_or_eos_index_<mode>"
4859   [(match_operand:SI 0 "register_operand")
4860    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4861    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4862   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4863   "TARGET_P9_VECTOR"
4864 {
4865   int sh;
4866   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4867   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4868   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4869   rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4870   rtx and_result = gen_reg_rtx (<MODE>mode);
4871   rtx result = gen_reg_rtx (<MODE>mode);
4872   rtx vzero = gen_reg_rtx (<MODE>mode);
4873
4874   /* Vector with zeros in elements that correspond to zeros in operands.  */
4875   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4876
4877   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4878   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4879   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4880
4881   /* Vector with ones in elments that match.  */
4882   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4883                                              operands[2]));
4884   emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4885
4886   /* Create vector with ones in elements where there was a zero in one of
4887      the source elements or the elements did not match.  */
4888   emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4889   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4890
4891   if (<MODE>mode == V16QImode)
4892     {
4893       if (!BYTES_BIG_ENDIAN)
4894         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4895       else
4896         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4897     }
4898   else
4899     {
4900       rtx tmp = gen_reg_rtx (SImode);
4901       if (!BYTES_BIG_ENDIAN)
4902         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4903       else
4904         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4905       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4906     }
4907   DONE;
4908 })
4909
4910 ;; Load VSX Vector with Length
4911 (define_expand "lxvl"
4912   [(set (match_dup 3)
4913         (ashift:DI (match_operand:DI 2 "register_operand")
4914                    (const_int 56)))
4915    (set (match_operand:V16QI 0 "vsx_register_operand")
4916         (unspec:V16QI
4917          [(match_operand:DI 1 "gpc_reg_operand")
4918           (mem:V16QI (match_dup 1))
4919           (match_dup 3)]
4920          UNSPEC_LXVL))]
4921   "TARGET_P9_VECTOR && TARGET_64BIT"
4922 {
4923   operands[3] = gen_reg_rtx (DImode);
4924 })
4925
4926 (define_insn "*lxvl"
4927   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4928         (unspec:V16QI
4929          [(match_operand:DI 1 "gpc_reg_operand" "b")
4930           (mem:V16QI (match_dup 1))
4931           (match_operand:DI 2 "register_operand" "r")]
4932          UNSPEC_LXVL))]
4933   "TARGET_P9_VECTOR && TARGET_64BIT"
4934   "lxvl %x0,%1,%2"
4935   [(set_attr "type" "vecload")])
4936
4937 (define_insn "lxvll"
4938   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4939         (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4940                        (mem:V16QI (match_dup 1))
4941                        (match_operand:DI 2 "register_operand" "r")]
4942                       UNSPEC_LXVLL))]
4943   "TARGET_P9_VECTOR"
4944   "lxvll %x0,%1,%2"
4945   [(set_attr "type" "vecload")])
4946
4947 ;; Expand for builtin xl_len_r
4948 (define_expand "xl_len_r"
4949   [(match_operand:V16QI 0 "vsx_register_operand")
4950    (match_operand:DI 1 "register_operand")
4951    (match_operand:DI 2 "register_operand")]
4952   ""
4953 {
4954   rtx shift_mask = gen_reg_rtx (V16QImode);
4955   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4956   rtx tmp = gen_reg_rtx (DImode);
4957
4958   emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4959   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4960   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4961   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4962              shift_mask));
4963   DONE;
4964 })
4965
4966 (define_insn "stxvll"
4967   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4968         (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4969                        (mem:V16QI (match_dup 1))
4970                        (match_operand:DI 2 "register_operand" "r")]
4971                       UNSPEC_STXVLL))]
4972   "TARGET_P9_VECTOR"
4973   "stxvll %x0,%1,%2"
4974   [(set_attr "type" "vecstore")])
4975
4976 ;; Store VSX Vector with Length
4977 (define_expand "stxvl"
4978   [(set (match_dup 3)
4979         (ashift:DI (match_operand:DI 2 "register_operand")
4980                    (const_int 56)))
4981    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
4982         (unspec:V16QI
4983          [(match_operand:V16QI 0 "vsx_register_operand")
4984           (mem:V16QI (match_dup 1))
4985           (match_dup 3)]
4986          UNSPEC_STXVL))]
4987   "TARGET_P9_VECTOR && TARGET_64BIT"
4988 {
4989   operands[3] = gen_reg_rtx (DImode);
4990 })
4991
4992 (define_insn "*stxvl"
4993   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4994         (unspec:V16QI
4995          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4996           (mem:V16QI (match_dup 1))
4997           (match_operand:DI 2 "register_operand" "r")]
4998          UNSPEC_STXVL))]
4999   "TARGET_P9_VECTOR && TARGET_64BIT"
5000   "stxvl %x0,%1,%2"
5001   [(set_attr "type" "vecstore")])
5002
5003 ;; Expand for builtin xst_len_r
5004 (define_expand "xst_len_r"
5005   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5006    (match_operand:DI 1 "register_operand" "b")
5007    (match_operand:DI 2 "register_operand" "r")]
5008   "UNSPEC_XST_LEN_R"
5009 {
5010   rtx shift_mask = gen_reg_rtx (V16QImode);
5011   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5012   rtx tmp = gen_reg_rtx (DImode);
5013
5014   emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5015   emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5016              shift_mask));
5017   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5018   emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5019   DONE;
5020 })
5021
5022 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5023 (define_insn "vcmpneb"
5024   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5025          (not:V16QI
5026            (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5027                      (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5028   "TARGET_P9_VECTOR"
5029   "vcmpneb %0,%1,%2"
5030   [(set_attr "type" "vecsimple")])
5031
5032 ;; Vector Compare Not Equal or Zero Byte
5033 (define_insn "vcmpnezb"
5034   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5035         (unspec:V16QI
5036          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5037           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5038          UNSPEC_VCMPNEZB))]
5039   "TARGET_P9_VECTOR"
5040   "vcmpnezb %0,%1,%2"
5041   [(set_attr "type" "vecsimple")])
5042
5043 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5044 (define_insn "vcmpnezb_p"
5045   [(set (reg:CC CR6_REGNO)
5046         (unspec:CC
5047          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5048           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5049          UNSPEC_VCMPNEZB))
5050    (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5051         (unspec:V16QI
5052          [(match_dup 1)
5053           (match_dup 2)]
5054          UNSPEC_VCMPNEZB))]
5055   "TARGET_P9_VECTOR"
5056   "vcmpnezb. %0,%1,%2"
5057   [(set_attr "type" "vecsimple")])
5058
5059 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5060 (define_insn "vcmpneh"
5061   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5062         (not:V8HI
5063           (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5064                    (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5065   "TARGET_P9_VECTOR"
5066   "vcmpneh %0,%1,%2"
5067   [(set_attr "type" "vecsimple")])
5068
5069 ;; Vector Compare Not Equal or Zero Half Word
5070 (define_insn "vcmpnezh"
5071   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5072         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5073                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
5074          UNSPEC_VCMPNEZH))]
5075   "TARGET_P9_VECTOR"
5076   "vcmpnezh %0,%1,%2"
5077   [(set_attr "type" "vecsimple")])
5078
5079 ;; Vector Compare Not Equal Word (specified/not+eq:)
5080 (define_insn "vcmpnew"
5081   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5082         (not:V4SI
5083           (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5084                    (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5085   "TARGET_P9_VECTOR"
5086   "vcmpnew %0,%1,%2"
5087   [(set_attr "type" "vecsimple")])
5088
5089 ;; Vector Compare Not Equal or Zero Word
5090 (define_insn "vcmpnezw"
5091   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5092         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5093                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
5094          UNSPEC_VCMPNEZW))]
5095   "TARGET_P9_VECTOR"
5096   "vcmpnezw %0,%1,%2"
5097   [(set_attr "type" "vecsimple")])
5098
5099 ;; Vector Count Leading Zero Least-Significant Bits Byte
5100 (define_insn "vclzlsbb_<mode>"
5101   [(set (match_operand:SI 0 "register_operand" "=r")
5102         (unspec:SI
5103          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5104          UNSPEC_VCLZLSBB))]
5105   "TARGET_P9_VECTOR"
5106   "vclzlsbb %0,%1"
5107   [(set_attr "type" "vecsimple")])
5108
5109 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5110 (define_insn "vctzlsbb_<mode>"
5111   [(set (match_operand:SI 0 "register_operand" "=r")
5112         (unspec:SI
5113          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5114          UNSPEC_VCTZLSBB))]
5115   "TARGET_P9_VECTOR"
5116   "vctzlsbb %0,%1"
5117   [(set_attr "type" "vecsimple")])
5118
5119 ;; Vector Extract Unsigned Byte Left-Indexed
5120 (define_insn "vextublx"
5121   [(set (match_operand:SI 0 "register_operand" "=r")
5122         (unspec:SI
5123          [(match_operand:SI 1 "register_operand" "r")
5124           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5125          UNSPEC_VEXTUBLX))]
5126   "TARGET_P9_VECTOR"
5127   "vextublx %0,%1,%2"
5128   [(set_attr "type" "vecsimple")])
5129
5130 ;; Vector Extract Unsigned Byte Right-Indexed
5131 (define_insn "vextubrx"
5132   [(set (match_operand:SI 0 "register_operand" "=r")
5133         (unspec:SI
5134          [(match_operand:SI 1 "register_operand" "r")
5135           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5136          UNSPEC_VEXTUBRX))]
5137   "TARGET_P9_VECTOR"
5138   "vextubrx %0,%1,%2"
5139   [(set_attr "type" "vecsimple")])
5140
5141 ;; Vector Extract Unsigned Half Word Left-Indexed
5142 (define_insn "vextuhlx"
5143   [(set (match_operand:SI 0 "register_operand" "=r")
5144         (unspec:SI
5145          [(match_operand:SI 1 "register_operand" "r")
5146           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5147          UNSPEC_VEXTUHLX))]
5148   "TARGET_P9_VECTOR"
5149   "vextuhlx %0,%1,%2"
5150   [(set_attr "type" "vecsimple")])
5151
5152 ;; Vector Extract Unsigned Half Word Right-Indexed
5153 (define_insn "vextuhrx"
5154   [(set (match_operand:SI 0 "register_operand" "=r")
5155         (unspec:SI
5156          [(match_operand:SI 1 "register_operand" "r")
5157           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5158          UNSPEC_VEXTUHRX))]
5159   "TARGET_P9_VECTOR"
5160   "vextuhrx %0,%1,%2"
5161   [(set_attr "type" "vecsimple")])
5162
5163 ;; Vector Extract Unsigned Word Left-Indexed
5164 (define_insn "vextuwlx"
5165   [(set (match_operand:SI 0 "register_operand" "=r")
5166         (unspec:SI
5167          [(match_operand:SI 1 "register_operand" "r")
5168           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5169          UNSPEC_VEXTUWLX))]
5170   "TARGET_P9_VECTOR"
5171   "vextuwlx %0,%1,%2"
5172   [(set_attr "type" "vecsimple")])
5173
5174 ;; Vector Extract Unsigned Word Right-Indexed
5175 (define_insn "vextuwrx"
5176   [(set (match_operand:SI 0 "register_operand" "=r")
5177         (unspec:SI
5178          [(match_operand:SI 1 "register_operand" "r")
5179           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5180          UNSPEC_VEXTUWRX))]
5181   "TARGET_P9_VECTOR"
5182   "vextuwrx %0,%1,%2"
5183   [(set_attr "type" "vecsimple")])
5184
5185 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
5186 ;; endian version needs to adjust the byte number, and the V4SI element in
5187 ;; vinsert4b.
5188 (define_insn "extract4b"
5189   [(set (match_operand:V2DI 0 "vsx_register_operand")
5190        (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5191                      (match_operand:QI 2 "const_0_to_12_operand" "n")]
5192                     UNSPEC_XXEXTRACTUW))]
5193   "TARGET_P9_VECTOR"
5194 {
5195   if (!BYTES_BIG_ENDIAN)
5196     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5197
5198   return "xxextractuw %x0,%x1,%2";
5199 })
5200
5201 (define_expand "insert4b"
5202   [(set (match_operand:V16QI 0 "vsx_register_operand")
5203         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5204                        (match_operand:V16QI 2 "vsx_register_operand")
5205                        (match_operand:QI 3 "const_0_to_12_operand")]
5206                    UNSPEC_XXINSERTW))]
5207   "TARGET_P9_VECTOR"
5208 {
5209   if (!BYTES_BIG_ENDIAN)
5210     {
5211       rtx op1 = operands[1];
5212       rtx v4si_tmp = gen_reg_rtx (V4SImode);
5213       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5214       operands[1] = v4si_tmp;
5215       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5216     }
5217 })
5218
5219 (define_insn "*insert4b_internal"
5220   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5221         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5222                        (match_operand:V16QI 2 "vsx_register_operand" "0")
5223                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
5224                    UNSPEC_XXINSERTW))]
5225   "TARGET_P9_VECTOR"
5226   "xxinsertw %x0,%x1,%3"
5227   [(set_attr "type" "vecperm")])
5228
5229
5230 ;; Generate vector extract four float 32 values from left four elements
5231 ;; of eight element vector of float 16 values.
5232 (define_expand "vextract_fp_from_shorth"
5233   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5234         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5235    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5236   "TARGET_P9_VECTOR"
5237 {
5238   int i;
5239   int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5240   int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5241
5242   rtx rvals[16];
5243   rtx mask = gen_reg_rtx (V16QImode);
5244   rtx tmp = gen_reg_rtx (V16QImode);
5245   rtvec v;
5246
5247   for (i = 0; i < 16; i++)
5248     if (!BYTES_BIG_ENDIAN)
5249       rvals[i] = GEN_INT (vals_le[i]);
5250     else
5251       rvals[i] = GEN_INT (vals_be[i]);
5252
5253   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5254      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5255      src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5256      conversion instruction.  */
5257   v = gen_rtvec_v (16, rvals);
5258   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5259   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5260                                           operands[1], mask));
5261   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5262   DONE;
5263 })
5264
5265 ;; Generate vector extract four float 32 values from right four elements
5266 ;; of eight element vector of float 16 values.
5267 (define_expand "vextract_fp_from_shortl"
5268   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5269         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5270         UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5271   "TARGET_P9_VECTOR"
5272 {
5273   int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5274   int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5275
5276   int i;
5277   rtx rvals[16];
5278   rtx mask = gen_reg_rtx (V16QImode);
5279   rtx tmp = gen_reg_rtx (V16QImode);
5280   rtvec v;
5281
5282   for (i = 0; i < 16; i++)
5283     if (!BYTES_BIG_ENDIAN)
5284       rvals[i] = GEN_INT (vals_le[i]);
5285     else
5286       rvals[i] = GEN_INT (vals_be[i]);
5287
5288   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5289      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5290      src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5291      conversion instruction.  */
5292   v = gen_rtvec_v (16, rvals);
5293   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5294   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5295                                           operands[1], mask));
5296   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5297   DONE;
5298 })
5299
5300 ;; Support for ISA 3.0 vector byte reverse
5301
5302 ;; Swap all bytes with in a vector
5303 (define_insn "p9_xxbrq_v1ti"
5304   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5305         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5306   "TARGET_P9_VECTOR"
5307   "xxbrq %x0,%x1"
5308   [(set_attr "type" "vecperm")])
5309
5310 (define_expand "p9_xxbrq_v16qi"
5311   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5312    (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5313   "TARGET_P9_VECTOR"
5314 {
5315   rtx op0 = gen_reg_rtx (V1TImode);
5316   rtx op1 = gen_lowpart (V1TImode, operands[1]);
5317   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5318   emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5319   DONE;
5320 })
5321
5322 ;; Swap all bytes in each 64-bit element
5323 (define_insn "p9_xxbrd_v2di"
5324   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5325         (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5326   "TARGET_P9_VECTOR"
5327   "xxbrd %x0,%x1"
5328   [(set_attr "type" "vecperm")])
5329
5330 (define_expand "p9_xxbrd_v2df"
5331   [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5332    (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5333   "TARGET_P9_VECTOR"
5334 {
5335   rtx op0 = gen_reg_rtx (V2DImode);
5336   rtx op1 = gen_lowpart (V2DImode, operands[1]);
5337   emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5338   emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5339   DONE;
5340 })
5341
5342 ;; Swap all bytes in each 32-bit element
5343 (define_insn "p9_xxbrw_v4si"
5344   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5345         (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5346   "TARGET_P9_VECTOR"
5347   "xxbrw %x0,%x1"
5348   [(set_attr "type" "vecperm")])
5349
5350 (define_expand "p9_xxbrw_v4sf"
5351   [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5352    (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5353   "TARGET_P9_VECTOR"
5354 {
5355   rtx op0 = gen_reg_rtx (V4SImode);
5356   rtx op1 = gen_lowpart (V4SImode, operands[1]);
5357   emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5358   emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5359   DONE;
5360 })
5361
5362 ;; Swap all bytes in each element of vector
5363 (define_expand "revb_<mode>"
5364   [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5365    (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5366   ""
5367 {
5368   if (TARGET_P9_VECTOR)
5369     emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5370   else
5371     {
5372       /* Want to have the elements in reverse order relative
5373          to the endian mode in use, i.e. in LE mode, put elements
5374          in BE order.  */
5375       rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5376       emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5377                                            operands[1], sel));
5378     }
5379
5380   DONE;
5381 })
5382
5383 ;; Reversing bytes in vector char is just a NOP.
5384 (define_expand "revb_v16qi"
5385   [(set (match_operand:V16QI 0 "vsx_register_operand")
5386         (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5387   ""
5388 {
5389   emit_move_insn (operands[0], operands[1]);
5390   DONE;
5391 })
5392
5393 ;; Swap all bytes in each 16-bit element
5394 (define_insn "p9_xxbrh_v8hi"
5395   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5396         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5397   "TARGET_P9_VECTOR"
5398   "xxbrh %x0,%x1"
5399   [(set_attr "type" "vecperm")])
5400 \f
5401
5402 ;; Operand numbers for the following peephole2
5403 (define_constants
5404   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
5405    (SFBOOL_TMP_VSX               1)             ;; vector temporary
5406    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
5407    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
5408    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
5409    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
5410    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
5411    (SFBOOL_SHL_D                 7)             ;; shift left dest
5412    (SFBOOL_SHL_A                 8)             ;; shift left arg
5413    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
5414    (SFBOOL_MFVSR_A_V4SF         10)             ;; SFBOOL_MFVSR_A as V4SFmode
5415    (SFBOOL_BOOL_A_DI            11)             ;; SFBOOL_BOOL_A1/A2 as DImode
5416    (SFBOOL_TMP_VSX_DI           12)             ;; SFBOOL_TMP_VSX as DImode
5417    (SFBOOL_MTVSR_D_V4SF         13)])           ;; SFBOOL_MTVSRD_D as V4SFmode
5418
5419 ;; Attempt to optimize some common GLIBC operations using logical operations to
5420 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
5421 ;; after macro expansion that looks like:
5422 ;;
5423 ;;      typedef union {
5424 ;;        float value;
5425 ;;        uint32_t word;
5426 ;;      } ieee_float_shape_type;
5427 ;;
5428 ;;      float t1;
5429 ;;      int32_t is;
5430 ;;
5431 ;;      do {
5432 ;;        ieee_float_shape_type gf_u;
5433 ;;        gf_u.value = (t1);
5434 ;;        (is) = gf_u.word;
5435 ;;      } while (0);
5436 ;;
5437 ;;      do {
5438 ;;        ieee_float_shape_type sf_u;
5439 ;;        sf_u.word = (is & 0xfffff000);
5440 ;;        (t1) = sf_u.value;
5441 ;;      } while (0);
5442 ;;
5443 ;;
5444 ;; This would result in two direct move operations (convert to memory format,
5445 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5446 ;; scalar format).  With this peephole, we eliminate the direct move to the
5447 ;; GPR, and instead move the integer mask value to the vector register after a
5448 ;; shift and do the VSX logical operation.
5449
5450 ;; The insns for dealing with SFmode in GPR registers looks like:
5451 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5452 ;;
5453 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5454 ;;
5455 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5456 ;;
5457 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5458 ;;
5459 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5460 ;;
5461 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5462
5463 (define_peephole2
5464   [(match_scratch:DI SFBOOL_TMP_GPR "r")
5465    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5466
5467    ;; MFVSRWZ (aka zero_extend)
5468    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5469         (zero_extend:DI
5470          (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5471
5472    ;; AND/IOR/XOR operation on int
5473    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5474         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5475                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5476
5477    ;; SLDI
5478    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5479         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5480                    (const_int 32)))
5481
5482    ;; MTVSRD
5483    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5484         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5485
5486   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5487    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5488       to compare registers, when the mode is different.  */
5489    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5490    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5491    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5492    && (REG_P (operands[SFBOOL_BOOL_A2])
5493        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5494    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5495        || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5496    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5497        || (REG_P (operands[SFBOOL_BOOL_A2])
5498            && REGNO (operands[SFBOOL_MFVSR_D])
5499                 == REGNO (operands[SFBOOL_BOOL_A2])))
5500    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5501    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5502        || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5503    && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5504   [(set (match_dup SFBOOL_TMP_GPR)
5505         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5506                    (const_int 32)))
5507
5508    (set (match_dup SFBOOL_TMP_VSX_DI)
5509         (match_dup SFBOOL_TMP_GPR))
5510
5511    (set (match_dup SFBOOL_MTVSR_D_V4SF)
5512         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5513                           (match_dup SFBOOL_TMP_VSX)))]
5514 {
5515   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5516   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5517   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5518   int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5519   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5520   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5521
5522   if (CONST_INT_P (bool_a2))
5523     {
5524       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5525       emit_move_insn (tmp_gpr, bool_a2);
5526       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5527     }
5528   else
5529     {
5530       int regno_bool_a1 = REGNO (bool_a1);
5531       int regno_bool_a2 = REGNO (bool_a2);
5532       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5533                           ? regno_bool_a2 : regno_bool_a1);
5534       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5535     }
5536
5537   operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5538   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5539   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5540 })