gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Mode attribute for vector floate and floato conversions
  25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
  26
  27 ;; Iterator for both scalar and vector floating point types supported by VSX
  28 (define_mode_iterator VSX_B [DF V4SF V2DF])
  29
  30 ;; Iterator for the 2 64-bit vector types
  31 (define_mode_iterator VSX_D [V2DF V2DI])
  32
  33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  34 ;; types that goes in a single vector register.
  35 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  36                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  37                                   TI
  38                                   V1TI])
  39
  40 ;; Iterator for 128-bit integer types that go in a single vector register.
  41 (define_mode_iterator VSX_TI [TI V1TI])
  42
  43 ;; Iterator for the 2 32-bit vector types
  44 (define_mode_iterator VSX_W [V4SF V4SI])
  45
  46 ;; Iterator for the DF types
  47 (define_mode_iterator VSX_DF [V2DF DF])
  48
  49 ;; Iterator for vector floating point types supported by VSX
  50 (define_mode_iterator VSX_F [V4SF V2DF])
  51
  52 ;; Iterator for logical types supported by VSX
  53 (define_mode_iterator VSX_L [V16QI
  54                              V8HI
  55                              V4SI
  56                              V2DI
  57                              V4SF
  58                              V2DF
  59                              V1TI
  60                              TI
  61                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  62                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  63
  64 ;; Iterator for memory moves.
  65 (define_mode_iterator VSX_M [V16QI
  66                              V8HI
  67                              V4SI
  68                              V2DI
  69                              V4SF
  70                              V2DF
  71                              V1TI
  72                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  73                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  74                              TI])
  75
  76 (define_mode_attr VSX_XXBR  [(V8HI  "h")
  77                              (V4SI  "w")
  78                              (V4SF  "w")
  79                              (V2DF  "d")
  80                              (V2DI  "d")
  81                              (V1TI  "q")])
  82
  83 ;; Map into the appropriate load/store name based on the type
  84 (define_mode_attr VSm  [(V16QI "vw4")
  85                         (V8HI  "vw4")
  86                         (V4SI  "vw4")
  87                         (V4SF  "vw4")
  88                         (V2DF  "vd2")
  89                         (V2DI  "vd2")
  90                         (DF    "d")
  91                         (TF    "vd2")
  92                         (KF    "vd2")
  93                         (V1TI  "vd2")
  94                         (TI    "vd2")])
  95
  96 ;; Map into the appropriate suffix based on the type
  97 (define_mode_attr VSs   [(V16QI "sp")
  98                          (V8HI  "sp")
  99                          (V4SI  "sp")
 100                          (V4SF  "sp")
 101                          (V2DF  "dp")
 102                          (V2DI  "dp")
 103                          (DF    "dp")
 104                          (SF    "sp")
 105                          (TF    "dp")
 106                          (KF    "dp")
 107                          (V1TI  "dp")
 108                          (TI    "dp")])
 109
 110 ;; Map the register class used
 111 (define_mode_attr VSr   [(V16QI "v")
 112                          (V8HI  "v")
 113                          (V4SI  "v")
 114                          (V4SF  "wf")
 115                          (V2DI  "wd")
 116                          (V2DF  "wd")
 117                          (DI    "wi")
 118                          (DF    "ws")
 119                          (SF    "ww")
 120                          (TF    "wp")
 121                          (KF    "wq")
 122                          (V1TI  "v")
 123                          (TI    "wt")])
 124
 125 ;; Map the register class used for float<->int conversions (floating point side)
 126 ;; VSr2 is the preferred register class, VSr3 is any register class that will
 127 ;; hold the data
 128 (define_mode_attr VSr2  [(V2DF  "wd")
 129                          (V4SF  "wf")
 130                          (DF    "ws")
 131                          (SF    "ww")
 132                          (DI    "wi")
 133                          (KF    "wq")
 134                          (TF    "wp")])
 135
 136 (define_mode_attr VSr3  [(V2DF  "wa")
 137                          (V4SF  "wa")
 138                          (DF    "ws")
 139                          (SF    "ww")
 140                          (DI    "wi")
 141                          (KF    "wq")
 142                          (TF    "wp")])
 143
 144 ;; Map the register class for sp<->dp float conversions, destination
 145 (define_mode_attr VSr4  [(SF    "ws")
 146                          (DF    "f")
 147                          (V2DF  "wd")
 148                          (V4SF  "v")])
 149
 150 ;; Map the register class for sp<->dp float conversions, source
 151 (define_mode_attr VSr5  [(SF    "ws")
 152                          (DF    "f")
 153                          (V2DF  "v")
 154                          (V4SF  "wd")])
 155
 156 ;; The VSX register class that a type can occupy, even if it is not the
 157 ;; preferred register class (VSr is the preferred register class that will get
 158 ;; allocated first).
 159 (define_mode_attr VSa   [(V16QI "wa")
 160                          (V8HI  "wa")
 161                          (V4SI  "wa")
 162                          (V4SF  "wa")
 163                          (V2DI  "wa")
 164                          (V2DF  "wa")
 165                          (DI    "wi")
 166                          (DF    "ws")
 167                          (SF    "ww")
 168                          (V1TI  "wa")
 169                          (TI    "wt")
 170                          (TF    "wp")
 171                          (KF    "wq")])
 172
 173 ;; A mode attribute to disparage use of GPR registers, except for scalar
 174 ;; integer modes.
 175 (define_mode_attr ??r   [(V16QI "??r")
 176                          (V8HI  "??r")
 177                          (V4SI  "??r")
 178                          (V4SF  "??r")
 179                          (V2DI  "??r")
 180                          (V2DF  "??r")
 181                          (V1TI  "??r")
 182                          (KF    "??r")
 183                          (TF    "??r")
 184                          (TI    "r")])
 185
 186 ;; Same size integer type for floating point data
 187 (define_mode_attr VSi [(V4SF  "v4si")
 188                        (V2DF  "v2di")
 189                        (DF    "di")])
 190
 191 (define_mode_attr VSI [(V4SF  "V4SI")
 192                        (V2DF  "V2DI")
 193                        (DF    "DI")])
 194
 195 ;; Word size for same size conversion
 196 (define_mode_attr VSc [(V4SF "w")
 197                        (V2DF "d")
 198                        (DF   "d")])
 199
 200 ;; Map into either s or v, depending on whether this is a scalar or vector
 201 ;; operation
 202 (define_mode_attr VSv   [(V16QI "v")
 203                          (V8HI  "v")
 204                          (V4SI  "v")
 205                          (V4SF  "v")
 206                          (V2DI  "v")
 207                          (V2DF  "v")
 208                          (V1TI  "v")
 209                          (DF    "s")
 210                          (KF    "v")])
 211
 212 ;; Appropriate type for add ops (and other simple FP ops)
 213 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 214                                  (V4SF "vecfloat")
 215                                  (DF   "fp")])
 216
 217 ;; Appropriate type for multiply ops
 218 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 219                                  (V4SF "vecfloat")
 220                                  (DF   "dmul")])
 221
 222 ;; Appropriate type for divide ops.
 223 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 224                                  (V4SF "vecfdiv")
 225                                  (DF   "ddiv")])
 226
 227 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 228 ;; the scalar sqrt
 229 (define_mode_attr VStype_sqrt   [(V2DF "dsqrt")
 230                                  (V4SF "ssqrt")
 231                                  (DF   "dsqrt")])
 232
 233 ;; Iterator and modes for sp<->dp conversions
 234 ;; Because scalar SF values are represented internally as double, use the
 235 ;; V4SF type to represent this than SF.
 236 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
 237
 238 (define_mode_attr VS_spdp_res [(DF      "V4SF")
 239                                (V4SF    "V2DF")
 240                                (V2DF    "V4SF")])
 241
 242 (define_mode_attr VS_spdp_insn [(DF     "xscvdpsp")
 243                                 (V4SF   "xvcvspdp")
 244                                 (V2DF   "xvcvdpsp")])
 245
 246 (define_mode_attr VS_spdp_type [(DF     "fp")
 247                                 (V4SF   "vecdouble")
 248                                 (V2DF   "vecdouble")])
 249
 250 ;; Map the scalar mode for a vector type
 251 (define_mode_attr VS_scalar [(V1TI      "TI")
 252                              (V2DF      "DF")
 253                              (V2DI      "DI")
 254                              (V4SF      "SF")
 255                              (V4SI      "SI")
 256                              (V8HI      "HI")
 257                              (V16QI     "QI")])
 258
 259 ;; Map to a double-sized vector mode
 260 (define_mode_attr VS_double [(V4SI      "V8SI")
 261                              (V4SF      "V8SF")
 262                              (V2DI      "V4DI")
 263                              (V2DF      "V4DF")
 264                              (V1TI      "V2TI")])
 265
 266 ;; Map register class for 64-bit element in 128-bit vector for direct moves
 267 ;; to/from gprs
 268 (define_mode_attr VS_64dm [(V2DF        "wk")
 269                            (V2DI        "wj")])
 270
 271 ;; Map register class for 64-bit element in 128-bit vector for normal register
 272 ;; to register moves
 273 (define_mode_attr VS_64reg [(V2DF       "ws")
 274                             (V2DI       "wi")])
 275
 276 ;; Iterators for loading constants with xxspltib
 277 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 278 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 279
 280 ;; Vector reverse byte modes
 281 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 282
 283 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 284 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 285 ;; done on ISA 2.07 and not just ISA 3.0.
 286 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 287 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 288
 289 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 290                                      (V8HI "h")
 291                                      (V4SI "w")])
 292
 293 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 294 ;; insert to validate the operand number.
 295 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 296                                          (V8HI  "const_0_to_7_operand")
 297                                          (V4SI  "const_0_to_3_operand")])
 298
 299 ;; Mode attribute to give the constraint for vector extract and insert
 300 ;; operations.
 301 (define_mode_attr VSX_EX [(V16QI "v")
 302                           (V8HI  "v")
 303                           (V4SI  "wa")])
 304
 305 ;; Mode iterator for binary floating types other than double to
 306 ;; optimize convert to that floating point type from an extract
 307 ;; of an integer type
 308 (define_mode_iterator VSX_EXTRACT_FL [SF
 309                                       (IF "FLOAT128_2REG_P (IFmode)")
 310                                       (KF "TARGET_FLOAT128_HW")
 311                                       (TF "FLOAT128_2REG_P (TFmode)
 312                                            || (FLOAT128_IEEE_P (TFmode)
 313                                                && TARGET_FLOAT128_HW)")])
 314
 315 ;; Mode iterator for binary floating types that have a direct conversion
 316 ;; from 64-bit integer to floating point
 317 (define_mode_iterator FL_CONV [SF
 318                                DF
 319                                (KF "TARGET_FLOAT128_HW")
 320                                (TF "TARGET_FLOAT128_HW
 321                                     && FLOAT128_IEEE_P (TFmode)")])
 322
 323 ;; Iterator for the 2 short vector types to do a splat from an integer
 324 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 325
 326 ;; Mode attribute to give the count for the splat instruction to splat
 327 ;; the value in the 64-bit integer slot
 328 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 329
 330 ;; Mode attribute to give the suffix for the splat instruction
 331 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 332
 333 ;; Constants for creating unspecs
 334 (define_c_enum "unspec"
 335   [UNSPEC_VSX_CONCAT
 336    UNSPEC_VSX_CVDPSXWS
 337    UNSPEC_VSX_CVDPUXWS
 338    UNSPEC_VSX_CVSPDP
 339    UNSPEC_VSX_CVHPSP
 340    UNSPEC_VSX_CVSPDPN
 341    UNSPEC_VSX_CVDPSPN
 342    UNSPEC_VSX_CVSXWDP
 343    UNSPEC_VSX_CVUXWDP
 344    UNSPEC_VSX_CVSXDSP
 345    UNSPEC_VSX_CVUXDSP
 346    UNSPEC_VSX_CVSPSXDS
 347    UNSPEC_VSX_CVSPUXDS
 348    UNSPEC_VSX_CVSXWSP
 349    UNSPEC_VSX_CVUXWSP
 350    UNSPEC_VSX_FLOAT2
 351    UNSPEC_VSX_UNS_FLOAT2
 352    UNSPEC_VSX_FLOATE
 353    UNSPEC_VSX_UNS_FLOATE
 354    UNSPEC_VSX_FLOATO
 355    UNSPEC_VSX_UNS_FLOATO
 356    UNSPEC_VSX_TDIV
 357    UNSPEC_VSX_TSQRT
 358    UNSPEC_VSX_SET
 359    UNSPEC_VSX_ROUND_I
 360    UNSPEC_VSX_ROUND_IC
 361    UNSPEC_VSX_SLDWI
 362    UNSPEC_VSX_XXPERM
 363
 364    UNSPEC_VSX_XXSPLTW
 365    UNSPEC_VSX_XXSPLTD
 366    UNSPEC_VSX_DIVSD
 367    UNSPEC_VSX_DIVUD
 368    UNSPEC_VSX_MULSD
 369    UNSPEC_VSX_XVCVSXDDP
 370    UNSPEC_VSX_XVCVUXDDP
 371    UNSPEC_VSX_XVCVDPSXDS
 372    UNSPEC_VSX_XVCDPSP
 373    UNSPEC_VSX_XVCVDPUXDS
 374    UNSPEC_VSX_SIGN_EXTEND
 375    UNSPEC_VSX_XVCVSPSXWS
 376    UNSPEC_VSX_XVCVSPSXDS
 377    UNSPEC_VSX_VSLO
 378    UNSPEC_VSX_EXTRACT
 379    UNSPEC_VSX_SXEXPDP
 380    UNSPEC_VSX_SXSIG
 381    UNSPEC_VSX_SIEXPDP
 382    UNSPEC_VSX_SIEXPQP
 383    UNSPEC_VSX_SCMPEXPDP
 384    UNSPEC_VSX_STSTDC
 385    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
 386    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
 387    UNSPEC_VSX_VXEXP
 388    UNSPEC_VSX_VXSIG
 389    UNSPEC_VSX_VIEXP
 390    UNSPEC_VSX_VTSTDC
 391    UNSPEC_VSX_VEC_INIT
 392    UNSPEC_VSX_VSIGNED2
 393
 394    UNSPEC_LXVL
 395    UNSPEC_LXVLL
 396    UNSPEC_LVSL_REG
 397    UNSPEC_LVSR_REG
 398    UNSPEC_STXVL
 399    UNSPEC_STXVLL
 400    UNSPEC_XL_LEN_R
 401    UNSPEC_XST_LEN_R
 402
 403    UNSPEC_VCLZLSBB
 404    UNSPEC_VCTZLSBB
 405    UNSPEC_VEXTUBLX
 406    UNSPEC_VEXTUHLX
 407    UNSPEC_VEXTUWLX
 408    UNSPEC_VEXTUBRX
 409    UNSPEC_VEXTUHRX
 410    UNSPEC_VEXTUWRX
 411    UNSPEC_VCMPNEB
 412    UNSPEC_VCMPNEZB
 413    UNSPEC_VCMPNEH
 414    UNSPEC_VCMPNEZH
 415    UNSPEC_VCMPNEW
 416    UNSPEC_VCMPNEZW
 417    UNSPEC_XXEXTRACTUW
 418    UNSPEC_XXINSERTW
 419    UNSPEC_VSX_FIRST_MATCH_INDEX
 420    UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
 421    UNSPEC_VSX_FIRST_MISMATCH_INDEX
 422    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
 423   ])
 424
 425 ;; VSX moves
 426
 427 ;; The patterns for LE permuted loads and stores come before the general
 428 ;; VSX moves so they match first.
 429 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 430   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
 431         (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
 432   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 433   "#"
 434   "&& 1"
 435   [(set (match_dup 2)
 436         (vec_select:<MODE>
 437           (match_dup 1)
 438           (parallel [(const_int 1) (const_int 0)])))
 439    (set (match_dup 0)
 440         (vec_select:<MODE>
 441           (match_dup 2)
 442           (parallel [(const_int 1) (const_int 0)])))]
 443 {
 444   rtx mem = operands[1];
 445
 446   /* Don't apply the swap optimization if we've already performed register
 447      allocation and the hard register destination is not in the altivec
 448      range.  */
 449   if ((MEM_ALIGN (mem) >= 128)
 450       && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
 451           || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
 452     {
 453       rtx mem_address = XEXP (mem, 0);
 454       enum machine_mode mode = GET_MODE (mem);
 455
 456       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 457         {
 458           /* Replace the source memory address with masked address.  */
 459           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 460           emit_insn (lvx_set_expr);
 461           DONE;
 462         }
 463       else if (rs6000_quadword_masked_address_p (mem_address))
 464         {
 465           /* This rtl is already in the form that matches lvx
 466              instruction, so leave it alone.  */
 467           DONE;
 468         }
 469       /* Otherwise, fall through to transform into a swapping load.  */
 470     }
 471   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 472                                        : operands[0];
 473 }
 474   [(set_attr "type" "vecload")
 475    (set_attr "length" "8")])
 476
 477 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 478   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
 479         (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
 480   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 481   "#"
 482   "&& 1"
 483   [(set (match_dup 2)
 484         (vec_select:<MODE>
 485           (match_dup 1)
 486           (parallel [(const_int 2) (const_int 3)
 487                      (const_int 0) (const_int 1)])))
 488    (set (match_dup 0)
 489         (vec_select:<MODE>
 490           (match_dup 2)
 491           (parallel [(const_int 2) (const_int 3)
 492                      (const_int 0) (const_int 1)])))]
 493 {
 494   rtx mem = operands[1];
 495
 496   /* Don't apply the swap optimization if we've already performed register
 497      allocation and the hard register destination is not in the altivec
 498      range.  */
 499   if ((MEM_ALIGN (mem) >= 128)
 500       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 501           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 502     {
 503       rtx mem_address = XEXP (mem, 0);
 504       enum machine_mode mode = GET_MODE (mem);
 505
 506       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 507         {
 508           /* Replace the source memory address with masked address.  */
 509           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 510           emit_insn (lvx_set_expr);
 511           DONE;
 512         }
 513       else if (rs6000_quadword_masked_address_p (mem_address))
 514         {
 515           /* This rtl is already in the form that matches lvx
 516              instruction, so leave it alone.  */
 517           DONE;
 518         }
 519       /* Otherwise, fall through to transform into a swapping load.  */
 520     }
 521   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 522                                        : operands[0];
 523 }
 524   [(set_attr "type" "vecload")
 525    (set_attr "length" "8")])
 526
 527 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 528   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 529         (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
 530   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 531   "#"
 532   "&& 1"
 533   [(set (match_dup 2)
 534         (vec_select:V8HI
 535           (match_dup 1)
 536           (parallel [(const_int 4) (const_int 5)
 537                      (const_int 6) (const_int 7)
 538                      (const_int 0) (const_int 1)
 539                      (const_int 2) (const_int 3)])))
 540    (set (match_dup 0)
 541         (vec_select:V8HI
 542           (match_dup 2)
 543           (parallel [(const_int 4) (const_int 5)
 544                      (const_int 6) (const_int 7)
 545                      (const_int 0) (const_int 1)
 546                      (const_int 2) (const_int 3)])))]
 547 {
 548   rtx mem = operands[1];
 549
 550   /* Don't apply the swap optimization if we've already performed register
 551      allocation and the hard register destination is not in the altivec
 552      range.  */
 553   if ((MEM_ALIGN (mem) >= 128)
 554       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 555           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 556     {
 557       rtx mem_address = XEXP (mem, 0);
 558       enum machine_mode mode = GET_MODE (mem);
 559
 560       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 561         {
 562           /* Replace the source memory address with masked address.  */
 563           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 564           emit_insn (lvx_set_expr);
 565           DONE;
 566         }
 567       else if (rs6000_quadword_masked_address_p (mem_address))
 568         {
 569           /* This rtl is already in the form that matches lvx
 570              instruction, so leave it alone.  */
 571           DONE;
 572         }
 573       /* Otherwise, fall through to transform into a swapping load.  */
 574     }
 575   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 576                                        : operands[0];
 577 }
 578   [(set_attr "type" "vecload")
 579    (set_attr "length" "8")])
 580
 581 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 582   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 583         (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
 584   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 585   "#"
 586   "&& 1"
 587   [(set (match_dup 2)
 588         (vec_select:V16QI
 589           (match_dup 1)
 590           (parallel [(const_int 8) (const_int 9)
 591                      (const_int 10) (const_int 11)
 592                      (const_int 12) (const_int 13)
 593                      (const_int 14) (const_int 15)
 594                      (const_int 0) (const_int 1)
 595                      (const_int 2) (const_int 3)
 596                      (const_int 4) (const_int 5)
 597                      (const_int 6) (const_int 7)])))
 598    (set (match_dup 0)
 599         (vec_select:V16QI
 600           (match_dup 2)
 601           (parallel [(const_int 8) (const_int 9)
 602                      (const_int 10) (const_int 11)
 603                      (const_int 12) (const_int 13)
 604                      (const_int 14) (const_int 15)
 605                      (const_int 0) (const_int 1)
 606                      (const_int 2) (const_int 3)
 607                      (const_int 4) (const_int 5)
 608                      (const_int 6) (const_int 7)])))]
 609 {
 610   rtx mem = operands[1];
 611
 612   /* Don't apply the swap optimization if we've already performed register
 613      allocation and the hard register destination is not in the altivec
 614      range.  */
 615   if ((MEM_ALIGN (mem) >= 128)
 616       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 617           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 618     {
 619       rtx mem_address = XEXP (mem, 0);
 620       enum machine_mode mode = GET_MODE (mem);
 621
 622       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 623         {
 624           /* Replace the source memory address with masked address.  */
 625           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 626           emit_insn (lvx_set_expr);
 627           DONE;
 628         }
 629       else if (rs6000_quadword_masked_address_p (mem_address))
 630         {
 631           /* This rtl is already in the form that matches lvx
 632              instruction, so leave it alone.  */
 633           DONE;
 634         }
 635       /* Otherwise, fall through to transform into a swapping load.  */
 636     }
 637   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 638                                        : operands[0];
 639 }
 640   [(set_attr "type" "vecload")
 641    (set_attr "length" "8")])
 642
 643 (define_insn "*vsx_le_perm_store_<mode>"
 644   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
 645         (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
 646   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 647   "#"
 648   [(set_attr "type" "vecstore")
 649    (set_attr "length" "12")])
 650
 651 (define_split
 652   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 653         (match_operand:VSX_D 1 "vsx_register_operand"))]
 654   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 655   [(set (match_dup 2)
 656         (vec_select:<MODE>
 657           (match_dup 1)
 658           (parallel [(const_int 1) (const_int 0)])))
 659    (set (match_dup 0)
 660         (vec_select:<MODE>
 661           (match_dup 2)
 662           (parallel [(const_int 1) (const_int 0)])))]
 663 {
 664   rtx mem = operands[0];
 665
 666   /* Don't apply the swap optimization if we've already performed register
 667      allocation and the hard register source is not in the altivec range.  */
 668   if ((MEM_ALIGN (mem) >= 128)
 669       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 670           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 671     {
 672       rtx mem_address = XEXP (mem, 0);
 673       enum machine_mode mode = GET_MODE (mem);
 674       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 675         {
 676           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 677           emit_insn (stvx_set_expr);
 678           DONE;
 679         }
 680       else if (rs6000_quadword_masked_address_p (mem_address))
 681         {
 682           /* This rtl is already in the form that matches stvx instruction,
 683              so leave it alone.  */
 684           DONE;
 685         }
 686       /* Otherwise, fall through to transform into a swapping store.  */
 687     }
 688
 689   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 690                                        : operands[1];
 691 })
 692
 693 ;; The post-reload split requires that we re-permute the source
 694 ;; register in case it is still live.
 695 (define_split
 696   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 697         (match_operand:VSX_D 1 "vsx_register_operand"))]
 698   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 699   [(set (match_dup 1)
 700         (vec_select:<MODE>
 701           (match_dup 1)
 702           (parallel [(const_int 1) (const_int 0)])))
 703    (set (match_dup 0)
 704         (vec_select:<MODE>
 705           (match_dup 1)
 706           (parallel [(const_int 1) (const_int 0)])))
 707    (set (match_dup 1)
 708         (vec_select:<MODE>
 709           (match_dup 1)
 710           (parallel [(const_int 1) (const_int 0)])))]
 711   "")
 712
 713 (define_insn "*vsx_le_perm_store_<mode>"
 714   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
 715         (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
 716   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 717   "#"
 718   [(set_attr "type" "vecstore")
 719    (set_attr "length" "12")])
 720
 721 (define_split
 722   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 723         (match_operand:VSX_W 1 "vsx_register_operand"))]
 724   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 725   [(set (match_dup 2)
 726         (vec_select:<MODE>
 727           (match_dup 1)
 728           (parallel [(const_int 2) (const_int 3)
 729                      (const_int 0) (const_int 1)])))
 730    (set (match_dup 0)
 731         (vec_select:<MODE>
 732           (match_dup 2)
 733           (parallel [(const_int 2) (const_int 3)
 734                      (const_int 0) (const_int 1)])))]
 735 {
 736   rtx mem = operands[0];
 737
 738   /* Don't apply the swap optimization if we've already performed register
 739      allocation and the hard register source is not in the altivec range.  */
 740   if ((MEM_ALIGN (mem) >= 128)
 741       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 742           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 743     {
 744       rtx mem_address = XEXP (mem, 0);
 745       enum machine_mode mode = GET_MODE (mem);
 746       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 747         {
 748           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 749           emit_insn (stvx_set_expr);
 750           DONE;
 751         }
 752       else if (rs6000_quadword_masked_address_p (mem_address))
 753         {
 754           /* This rtl is already in the form that matches stvx instruction,
 755              so leave it alone.  */
 756           DONE;
 757         }
 758       /* Otherwise, fall through to transform into a swapping store.  */
 759     }
 760
 761   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 762                                        : operands[1];
 763 })
 764
 765 ;; The post-reload split requires that we re-permute the source
 766 ;; register in case it is still live.
 767 (define_split
 768   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 769         (match_operand:VSX_W 1 "vsx_register_operand"))]
 770   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 771   [(set (match_dup 1)
 772         (vec_select:<MODE>
 773           (match_dup 1)
 774           (parallel [(const_int 2) (const_int 3)
 775                      (const_int 0) (const_int 1)])))
 776    (set (match_dup 0)
 777         (vec_select:<MODE>
 778           (match_dup 1)
 779           (parallel [(const_int 2) (const_int 3)
 780                      (const_int 0) (const_int 1)])))
 781    (set (match_dup 1)
 782         (vec_select:<MODE>
 783           (match_dup 1)
 784           (parallel [(const_int 2) (const_int 3)
 785                      (const_int 0) (const_int 1)])))]
 786   "")
 787
 788 (define_insn "*vsx_le_perm_store_v8hi"
 789   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
 790         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 791   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 792   "#"
 793   [(set_attr "type" "vecstore")
 794    (set_attr "length" "12")])
 795
 796 (define_split
 797   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 798         (match_operand:V8HI 1 "vsx_register_operand"))]
 799   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 800   [(set (match_dup 2)
 801         (vec_select:V8HI
 802           (match_dup 1)
 803           (parallel [(const_int 4) (const_int 5)
 804                      (const_int 6) (const_int 7)
 805                      (const_int 0) (const_int 1)
 806                      (const_int 2) (const_int 3)])))
 807    (set (match_dup 0)
 808         (vec_select:V8HI
 809           (match_dup 2)
 810           (parallel [(const_int 4) (const_int 5)
 811                      (const_int 6) (const_int 7)
 812                      (const_int 0) (const_int 1)
 813                      (const_int 2) (const_int 3)])))]
 814 {
 815   rtx mem = operands[0];
 816
 817   /* Don't apply the swap optimization if we've already performed register
 818      allocation and the hard register source is not in the altivec range.  */
 819   if ((MEM_ALIGN (mem) >= 128)
 820       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 821           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 822     {
 823       rtx mem_address = XEXP (mem, 0);
 824       enum machine_mode mode = GET_MODE (mem);
 825       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 826         {
 827           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 828           emit_insn (stvx_set_expr);
 829           DONE;
 830         }
 831       else if (rs6000_quadword_masked_address_p (mem_address))
 832         {
 833           /* This rtl is already in the form that matches stvx instruction,
 834              so leave it alone.  */
 835           DONE;
 836         }
 837       /* Otherwise, fall through to transform into a swapping store.  */
 838     }
 839
 840   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 841                                        : operands[1];
 842 })
 843
 844 ;; The post-reload split requires that we re-permute the source
 845 ;; register in case it is still live.
 846 (define_split
 847   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 848         (match_operand:V8HI 1 "vsx_register_operand"))]
 849   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 850   [(set (match_dup 1)
 851         (vec_select:V8HI
 852           (match_dup 1)
 853           (parallel [(const_int 4) (const_int 5)
 854                      (const_int 6) (const_int 7)
 855                      (const_int 0) (const_int 1)
 856                      (const_int 2) (const_int 3)])))
 857    (set (match_dup 0)
 858         (vec_select:V8HI
 859           (match_dup 1)
 860           (parallel [(const_int 4) (const_int 5)
 861                      (const_int 6) (const_int 7)
 862                      (const_int 0) (const_int 1)
 863                      (const_int 2) (const_int 3)])))
 864    (set (match_dup 1)
 865         (vec_select:V8HI
 866           (match_dup 1)
 867           (parallel [(const_int 4) (const_int 5)
 868                      (const_int 6) (const_int 7)
 869                      (const_int 0) (const_int 1)
 870                      (const_int 2) (const_int 3)])))]
 871   "")
 872
 873 (define_insn "*vsx_le_perm_store_v16qi"
 874   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
 875         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 876   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 877   "#"
 878   [(set_attr "type" "vecstore")
 879    (set_attr "length" "12")])
 880
 881 (define_split
 882   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 883         (match_operand:V16QI 1 "vsx_register_operand"))]
 884   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 885   [(set (match_dup 2)
 886         (vec_select:V16QI
 887           (match_dup 1)
 888           (parallel [(const_int 8) (const_int 9)
 889                      (const_int 10) (const_int 11)
 890                      (const_int 12) (const_int 13)
 891                      (const_int 14) (const_int 15)
 892                      (const_int 0) (const_int 1)
 893                      (const_int 2) (const_int 3)
 894                      (const_int 4) (const_int 5)
 895                      (const_int 6) (const_int 7)])))
 896    (set (match_dup 0)
 897         (vec_select:V16QI
 898           (match_dup 2)
 899           (parallel [(const_int 8) (const_int 9)
 900                      (const_int 10) (const_int 11)
 901                      (const_int 12) (const_int 13)
 902                      (const_int 14) (const_int 15)
 903                      (const_int 0) (const_int 1)
 904                      (const_int 2) (const_int 3)
 905                      (const_int 4) (const_int 5)
 906                      (const_int 6) (const_int 7)])))]
 907 {
 908   rtx mem = operands[0];
 909
 910   /* Don't apply the swap optimization if we've already performed register
 911      allocation and the hard register source is not in the altivec range.  */
 912   if ((MEM_ALIGN (mem) >= 128)
 913       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 914           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 915     {
 916       rtx mem_address = XEXP (mem, 0);
 917       enum machine_mode mode = GET_MODE (mem);
 918       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 919         {
 920           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 921           emit_insn (stvx_set_expr);
 922           DONE;
 923         }
 924       else if (rs6000_quadword_masked_address_p (mem_address))
 925         {
 926           /* This rtl is already in the form that matches stvx instruction,
 927              so leave it alone.  */
 928           DONE;
 929         }
 930       /* Otherwise, fall through to transform into a swapping store.  */
 931     }
 932
 933   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 934                                        : operands[1];
 935 })
 936
 937 ;; The post-reload split requires that we re-permute the source
 938 ;; register in case it is still live.
 939 (define_split
 940   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 941         (match_operand:V16QI 1 "vsx_register_operand"))]
 942   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 943   [(set (match_dup 1)
 944         (vec_select:V16QI
 945           (match_dup 1)
 946           (parallel [(const_int 8) (const_int 9)
 947                      (const_int 10) (const_int 11)
 948                      (const_int 12) (const_int 13)
 949                      (const_int 14) (const_int 15)
 950                      (const_int 0) (const_int 1)
 951                      (const_int 2) (const_int 3)
 952                      (const_int 4) (const_int 5)
 953                      (const_int 6) (const_int 7)])))
 954    (set (match_dup 0)
 955         (vec_select:V16QI
 956           (match_dup 1)
 957           (parallel [(const_int 8) (const_int 9)
 958                      (const_int 10) (const_int 11)
 959                      (const_int 12) (const_int 13)
 960                      (const_int 14) (const_int 15)
 961                      (const_int 0) (const_int 1)
 962                      (const_int 2) (const_int 3)
 963                      (const_int 4) (const_int 5)
 964                      (const_int 6) (const_int 7)])))
 965    (set (match_dup 1)
 966         (vec_select:V16QI
 967           (match_dup 1)
 968           (parallel [(const_int 8) (const_int 9)
 969                      (const_int 10) (const_int 11)
 970                      (const_int 12) (const_int 13)
 971                      (const_int 14) (const_int 15)
 972                      (const_int 0) (const_int 1)
 973                      (const_int 2) (const_int 3)
 974                      (const_int 4) (const_int 5)
 975                      (const_int 6) (const_int 7)])))]
 976   "")
 977
 978 ;; Little endian word swapping for 128-bit types that are either scalars or the
 979 ;; special V1TI container class, which it is not appropriate to use vec_select
 980 ;; for the type.
 981 (define_insn "*vsx_le_permute_<mode>"
 982   [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
 983         (rotate:VSX_TI
 984          (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
 985          (const_int 64)))]
 986   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 987   "@
 988    xxpermdi %x0,%x1,%x1,2
 989    lxvd2x %x0,%y1
 990    stxvd2x %x1,%y0
 991    mr %0,%L1\;mr %L0,%1
 992    ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
 993    std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
 994   [(set_attr "length" "4,4,4,8,8,8")
 995    (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
 996
 997 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
 998   [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
 999         (rotate:VSX_TI
1000          (rotate:VSX_TI
1001           (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
1002           (const_int 64))
1003          (const_int 64)))]
1004   "!BYTES_BIG_ENDIAN && TARGET_VSX"
1005   "@
1006    #
1007    xxlor %x0,%x1"
1008   ""
1009   [(set (match_dup 0) (match_dup 1))]
1010 {
1011   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1012     {
1013       emit_note (NOTE_INSN_DELETED);
1014       DONE;
1015     }
1016 }
1017   [(set_attr "length" "0,4")
1018    (set_attr "type" "veclogical")])
1019
1020 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1021   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1022         (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1023   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1024   "@
1025    #
1026    #"
1027   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1028   [(const_int 0)]
1029 {
1030   rtx tmp = (can_create_pseudo_p ()
1031              ? gen_reg_rtx_and_attrs (operands[0])
1032              : operands[0]);
1033   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1034   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1035   DONE;
1036 }
1037   [(set_attr "type" "vecload,load")
1038    (set_attr "length" "8,8")])
1039
1040 (define_insn "*vsx_le_perm_store_<mode>"
1041   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1042         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
1043   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1044   "@
1045    #
1046    #"
1047   [(set_attr "type" "vecstore,store")
1048    (set_attr "length" "12,8")])
1049
1050 (define_split
1051   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1052         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1053   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1054   [(const_int 0)]
1055 {
1056   rtx tmp = (can_create_pseudo_p ()
1057              ? gen_reg_rtx_and_attrs (operands[0])
1058              : operands[0]);
1059   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1060   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1061   DONE;
1062 })
1063
1064 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1065 ;; GPR registers on a little endian system.
1066 (define_peephole2
1067   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1068         (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1069                        (const_int 64)))
1070    (set (match_operand:VSX_TI 2 "int_reg_operand")
1071         (rotate:VSX_TI (match_dup 0)
1072                        (const_int 64)))]
1073   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1074    && (rtx_equal_p (operands[0], operands[2])
1075        || peep2_reg_dead_p (2, operands[0]))"
1076    [(set (match_dup 2) (match_dup 1))])
1077
1078 (define_peephole2
1079   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1080         (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1081                        (const_int 64)))
1082    (set (match_operand:VSX_TI 2 "memory_operand")
1083         (rotate:VSX_TI (match_dup 0)
1084                        (const_int 64)))]
1085   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1086    && peep2_reg_dead_p (2, operands[0])"
1087    [(set (match_dup 2) (match_dup 1))])
1088
1089 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1090 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1091 ;; floating point are handled by the more generic swap elimination pass.
1092 (define_peephole2
1093   [(set (match_operand:TI 0 "vsx_register_operand")
1094         (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1095                    (const_int 64)))
1096    (set (match_operand:TI 2 "vsx_register_operand")
1097         (rotate:TI (match_dup 0)
1098                    (const_int 64)))]
1099   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1100    && (rtx_equal_p (operands[0], operands[2])
1101        || peep2_reg_dead_p (2, operands[0]))"
1102    [(set (match_dup 2) (match_dup 1))])
1103
1104 ;; The post-reload split requires that we re-permute the source
1105 ;; register in case it is still live.
1106 (define_split
1107   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1108         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1109   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1110   [(const_int 0)]
1111 {
1112   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1113   rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1114   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1115   DONE;
1116 })
1117
1118 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1119 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1120 (define_insn "xxspltib_v16qi"
1121   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1122         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1123   "TARGET_P9_VECTOR"
1124 {
1125   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1126   return "xxspltib %x0,%2";
1127 }
1128   [(set_attr "type" "vecperm")])
1129
1130 (define_insn "xxspltib_<mode>_nosplit"
1131   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1132         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1133   "TARGET_P9_VECTOR"
1134 {
1135   rtx op1 = operands[1];
1136   int value = 256;
1137   int num_insns = -1;
1138
1139   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1140       || num_insns != 1)
1141     gcc_unreachable ();
1142
1143   operands[2] = GEN_INT (value & 0xff);
1144   return "xxspltib %x0,%2";
1145 }
1146   [(set_attr "type" "vecperm")])
1147
1148 (define_insn_and_split "*xxspltib_<mode>_split"
1149   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1150         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1151   "TARGET_P9_VECTOR"
1152   "#"
1153   "&& 1"
1154   [(const_int 0)]
1155 {
1156   int value = 256;
1157   int num_insns = -1;
1158   rtx op0 = operands[0];
1159   rtx op1 = operands[1];
1160   rtx tmp = ((can_create_pseudo_p ())
1161              ? gen_reg_rtx (V16QImode)
1162              : gen_lowpart (V16QImode, op0));
1163
1164   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1165       || num_insns != 2)
1166     gcc_unreachable ();
1167
1168   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1169
1170   if (<MODE>mode == V2DImode)
1171     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1172
1173   else if (<MODE>mode == V4SImode)
1174     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1175
1176   else if (<MODE>mode == V8HImode)
1177     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1178
1179   else
1180     gcc_unreachable ();
1181
1182   DONE;
1183 }
1184   [(set_attr "type" "vecperm")
1185    (set_attr "length" "8")])
1186
1187
1188 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1189 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1190 ;; all 1's, since the machine does not have to wait for the previous
1191 ;; instruction using the register being set (such as a store waiting on a slow
1192 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1193
1194 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1195 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1196 ;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
1197 (define_insn "vsx_mov<mode>_64bit"
1198   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1199                "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
1200                 ?&r,       ??r,       ??Y,       <??r>,     wo,        v,
1201                 ?<VSa>,    *r,        v,         ??r,       wZ,        v")
1202
1203         (match_operand:VSX_M 1 "input_operand"
1204                "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
1205                 wQ,        Y,         r,         r,         wE,        jwM,
1206                 ?jwM,      jwM,       W,         W,         v,         wZ"))]
1207
1208   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1209    && (register_operand (operands[0], <MODE>mode)
1210        || register_operand (operands[1], <MODE>mode))"
1211 {
1212   return rs6000_output_move_128bit (operands);
1213 }
1214   [(set_attr "type"
1215                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1216                 store,     load,      store,     *,         vecsimple, vecsimple,
1217                 vecsimple, *,         *,         *,         vecstore,  vecload")
1218
1219    (set_attr "length"
1220                "4,         4,         4,         8,         4,         8,
1221                 8,         8,         8,         8,         4,         4,
1222                 4,         8,         20,        20,        4,         4")])
1223
1224 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1225 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
1226 ;;              LVX (VMX)  STVX (VMX)
1227 (define_insn "*vsx_mov<mode>_32bit"
1228   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1229                "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       <??r>,
1230                 wo,        v,         ?<VSa>,    *r,        v,         ??r,
1231                 wZ,        v")
1232
1233         (match_operand:VSX_M 1 "input_operand"
1234                "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
1235                 wE,        jwM,       ?jwM,      jwM,       W,         W,
1236                 v,         wZ"))]
1237
1238   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1239    && (register_operand (operands[0], <MODE>mode)
1240        || register_operand (operands[1], <MODE>mode))"
1241 {
1242   return rs6000_output_move_128bit (operands);
1243 }
1244   [(set_attr "type"
1245                "vecstore,  vecload,   vecsimple, load,      store,    *,
1246                 vecsimple, vecsimple, vecsimple, *,         *,        *,
1247                 vecstore,  vecload")
1248
1249    (set_attr "length"
1250                "4,         4,         4,         16,        16,        16,
1251                 4,         4,         4,         16,        20,        32,
1252                 4,         4")])
1253
1254 ;; Explicit  load/store expanders for the builtin functions
1255 (define_expand "vsx_load_<mode>"
1256   [(set (match_operand:VSX_M 0 "vsx_register_operand")
1257         (match_operand:VSX_M 1 "memory_operand"))]
1258   "VECTOR_MEM_VSX_P (<MODE>mode)"
1259 {
1260   /* Expand to swaps if needed, prior to swap optimization.  */
1261   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1262     {
1263       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1264       DONE;
1265     }
1266 })
1267
1268 (define_expand "vsx_store_<mode>"
1269   [(set (match_operand:VSX_M 0 "memory_operand")
1270         (match_operand:VSX_M 1 "vsx_register_operand"))]
1271   "VECTOR_MEM_VSX_P (<MODE>mode)"
1272 {
1273   /* Expand to swaps if needed, prior to swap optimization.  */
1274   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1275     {
1276       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1277       DONE;
1278     }
1279 })
1280
1281 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1282 ;; when you really want their element-reversing behavior.
1283 (define_insn "vsx_ld_elemrev_v2di"
1284   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1285         (vec_select:V2DI
1286           (match_operand:V2DI 1 "memory_operand" "Z")
1287           (parallel [(const_int 1) (const_int 0)])))]
1288   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1289   "lxvd2x %x0,%y1"
1290   [(set_attr "type" "vecload")])
1291
1292 (define_insn "vsx_ld_elemrev_v1ti"
1293   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1294         (vec_select:V1TI
1295           (match_operand:V1TI 1 "memory_operand" "Z")
1296           (parallel [(const_int 0)])))]
1297   "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1298 {
1299    return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1300 }
1301   [(set_attr "type" "vecload")])
1302
1303 (define_insn "vsx_ld_elemrev_v2df"
1304   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1305         (vec_select:V2DF
1306           (match_operand:V2DF 1 "memory_operand" "Z")
1307           (parallel [(const_int 1) (const_int 0)])))]
1308   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1309   "lxvd2x %x0,%y1"
1310   [(set_attr "type" "vecload")])
1311
1312 (define_insn "vsx_ld_elemrev_v4si"
1313   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1314         (vec_select:V4SI
1315           (match_operand:V4SI 1 "memory_operand" "Z")
1316           (parallel [(const_int 3) (const_int 2)
1317                      (const_int 1) (const_int 0)])))]
1318   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1319   "lxvw4x %x0,%y1"
1320   [(set_attr "type" "vecload")])
1321
1322 (define_insn "vsx_ld_elemrev_v4sf"
1323   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1324         (vec_select:V4SF
1325           (match_operand:V4SF 1 "memory_operand" "Z")
1326           (parallel [(const_int 3) (const_int 2)
1327                      (const_int 1) (const_int 0)])))]
1328   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1329   "lxvw4x %x0,%y1"
1330   [(set_attr "type" "vecload")])
1331
1332 (define_expand "vsx_ld_elemrev_v8hi"
1333   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1334         (vec_select:V8HI
1335           (match_operand:V8HI 1 "memory_operand" "Z")
1336           (parallel [(const_int 7) (const_int 6)
1337                      (const_int 5) (const_int 4)
1338                      (const_int 3) (const_int 2)
1339                      (const_int 1) (const_int 0)])))]
1340   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1341 {
1342   if (!TARGET_P9_VECTOR)
1343     {
1344       rtx tmp = gen_reg_rtx (V4SImode);
1345       rtx subreg, subreg2, perm[16], pcv;
1346       /* 2 is leftmost element in register */
1347       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1348       int i;
1349
1350       subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1351       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1352       subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1353
1354       for (i = 0; i < 16; ++i)
1355         perm[i] = GEN_INT (reorder[i]);
1356
1357       pcv = force_reg (V16QImode,
1358                        gen_rtx_CONST_VECTOR (V16QImode,
1359                                              gen_rtvec_v (16, perm)));
1360       emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1361                                                 subreg2, pcv));
1362       DONE;
1363     }
1364 })
1365
1366 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1367   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1368         (vec_select:V8HI
1369           (match_operand:V8HI 1 "memory_operand" "Z")
1370           (parallel [(const_int 7) (const_int 6)
1371                      (const_int 5) (const_int 4)
1372                      (const_int 3) (const_int 2)
1373                      (const_int 1) (const_int 0)])))]
1374   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1375   "lxvh8x %x0,%y1"
1376   [(set_attr "type" "vecload")])
1377
1378 (define_expand "vsx_ld_elemrev_v16qi"
1379   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1380         (vec_select:V16QI
1381           (match_operand:V16QI 1 "memory_operand" "Z")
1382           (parallel [(const_int 15) (const_int 14)
1383                      (const_int 13) (const_int 12)
1384                      (const_int 11) (const_int 10)
1385                      (const_int  9) (const_int  8)
1386                      (const_int  7) (const_int  6)
1387                      (const_int  5) (const_int  4)
1388                      (const_int  3) (const_int  2)
1389                      (const_int  1) (const_int  0)])))]
1390   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1391 {
1392   if (!TARGET_P9_VECTOR)
1393     {
1394       rtx tmp = gen_reg_rtx (V4SImode);
1395       rtx subreg, subreg2, perm[16], pcv;
1396       /* 3 is leftmost element in register */
1397       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1398       int i;
1399
1400       subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1401       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1402       subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1403
1404       for (i = 0; i < 16; ++i)
1405         perm[i] = GEN_INT (reorder[i]);
1406
1407       pcv = force_reg (V16QImode,
1408                        gen_rtx_CONST_VECTOR (V16QImode,
1409                                              gen_rtvec_v (16, perm)));
1410       emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1411                                                  subreg2, pcv));
1412       DONE;
1413     }
1414 })
1415
1416 (define_insn "*vsx_ld_elemrev_v16qi_internal"
1417   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1418         (vec_select:V16QI
1419           (match_operand:V16QI 1 "memory_operand" "Z")
1420           (parallel [(const_int 15) (const_int 14)
1421                      (const_int 13) (const_int 12)
1422                      (const_int 11) (const_int 10)
1423                      (const_int  9) (const_int  8)
1424                      (const_int  7) (const_int  6)
1425                      (const_int  5) (const_int  4)
1426                      (const_int  3) (const_int  2)
1427                      (const_int  1) (const_int  0)])))]
1428   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1429   "lxvb16x %x0,%y1"
1430   [(set_attr "type" "vecload")])
1431
1432 (define_insn "vsx_st_elemrev_v1ti"
1433   [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1434         (vec_select:V1TI
1435           (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1436           (parallel [(const_int 0)])))
1437    (clobber (match_dup 1))]
1438   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1439 {
1440   return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1441 }
1442   [(set_attr "type" "vecstore")])
1443
1444 (define_insn "vsx_st_elemrev_v2df"
1445   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1446         (vec_select:V2DF
1447           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1448           (parallel [(const_int 1) (const_int 0)])))]
1449   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1450   "stxvd2x %x1,%y0"
1451   [(set_attr "type" "vecstore")])
1452
1453 (define_insn "vsx_st_elemrev_v2di"
1454   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1455         (vec_select:V2DI
1456           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1457           (parallel [(const_int 1) (const_int 0)])))]
1458   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1459   "stxvd2x %x1,%y0"
1460   [(set_attr "type" "vecstore")])
1461
1462 (define_insn "vsx_st_elemrev_v4sf"
1463   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1464         (vec_select:V4SF
1465           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1466           (parallel [(const_int 3) (const_int 2)
1467                      (const_int 1) (const_int 0)])))]
1468   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1469   "stxvw4x %x1,%y0"
1470   [(set_attr "type" "vecstore")])
1471
1472 (define_insn "vsx_st_elemrev_v4si"
1473   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1474         (vec_select:V4SI
1475           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1476           (parallel [(const_int 3) (const_int 2)
1477                      (const_int 1) (const_int 0)])))]
1478   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1479   "stxvw4x %x1,%y0"
1480   [(set_attr "type" "vecstore")])
1481
1482 (define_expand "vsx_st_elemrev_v8hi"
1483   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1484         (vec_select:V8HI
1485           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1486           (parallel [(const_int 7) (const_int 6)
1487                      (const_int 5) (const_int 4)
1488                      (const_int 3) (const_int 2)
1489                      (const_int 1) (const_int 0)])))]
1490   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1491 {
1492   if (!TARGET_P9_VECTOR)
1493     {
1494       rtx mem_subreg, subreg, perm[16], pcv;
1495       rtx tmp = gen_reg_rtx (V8HImode);
1496       /* 2 is leftmost element in register */
1497       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1498       int i;
1499
1500       for (i = 0; i < 16; ++i)
1501         perm[i] = GEN_INT (reorder[i]);
1502
1503       pcv = force_reg (V16QImode,
1504                        gen_rtx_CONST_VECTOR (V16QImode,
1505                                              gen_rtvec_v (16, perm)));
1506       emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1507                                                 operands[1], pcv));
1508       subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1509       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1510       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1511       DONE;
1512     }
1513 })
1514
1515 (define_insn "*vsx_st_elemrev_v2di_internal"
1516   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1517         (vec_select:V2DI
1518           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1519           (parallel [(const_int 1) (const_int 0)])))]
1520   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1521   "stxvd2x %x1,%y0"
1522   [(set_attr "type" "vecstore")])
1523
1524 (define_insn "*vsx_st_elemrev_v8hi_internal"
1525   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1526         (vec_select:V8HI
1527           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1528           (parallel [(const_int 7) (const_int 6)
1529                      (const_int 5) (const_int 4)
1530                      (const_int 3) (const_int 2)
1531                      (const_int 1) (const_int 0)])))]
1532   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1533   "stxvh8x %x1,%y0"
1534   [(set_attr "type" "vecstore")])
1535
1536 (define_expand "vsx_st_elemrev_v16qi"
1537   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1538         (vec_select:V16QI
1539           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1540           (parallel [(const_int 15) (const_int 14)
1541                      (const_int 13) (const_int 12)
1542                      (const_int 11) (const_int 10)
1543                      (const_int  9) (const_int  8)
1544                      (const_int  7) (const_int  6)
1545                      (const_int  5) (const_int  4)
1546                      (const_int  3) (const_int  2)
1547                      (const_int  1) (const_int  0)])))]
1548   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1549 {
1550   if (!TARGET_P9_VECTOR)
1551     {
1552       rtx mem_subreg, subreg, perm[16], pcv;
1553       rtx tmp = gen_reg_rtx (V16QImode);
1554       /* 3 is leftmost element in register */
1555       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1556       int i;
1557
1558       for (i = 0; i < 16; ++i)
1559         perm[i] = GEN_INT (reorder[i]);
1560
1561       pcv = force_reg (V16QImode,
1562                        gen_rtx_CONST_VECTOR (V16QImode,
1563                                              gen_rtvec_v (16, perm)));
1564       emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1565                                                  operands[1], pcv));
1566       subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1567       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1568       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1569       DONE;
1570     }
1571 })
1572
1573 (define_insn "*vsx_st_elemrev_v16qi_internal"
1574   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1575         (vec_select:V16QI
1576           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1577           (parallel [(const_int 15) (const_int 14)
1578                      (const_int 13) (const_int 12)
1579                      (const_int 11) (const_int 10)
1580                      (const_int  9) (const_int  8)
1581                      (const_int  7) (const_int  6)
1582                      (const_int  5) (const_int  4)
1583                      (const_int  3) (const_int  2)
1584                      (const_int  1) (const_int  0)])))]
1585   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1586   "stxvb16x %x1,%y0"
1587   [(set_attr "type" "vecstore")])
1588
1589 \f
1590 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1591 ;; instructions are now combined with the insn for the traditional floating
1592 ;; point unit.
1593 (define_insn "*vsx_add<mode>3"
1594   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1595         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1596                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1597   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1598   "xvadd<VSs> %x0,%x1,%x2"
1599   [(set_attr "type" "<VStype_simple>")])
1600
1601 (define_insn "*vsx_sub<mode>3"
1602   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1603         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1604                      (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1605   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1606   "xvsub<VSs> %x0,%x1,%x2"
1607   [(set_attr "type" "<VStype_simple>")])
1608
1609 (define_insn "*vsx_mul<mode>3"
1610   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1611         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1612                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1613   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1614   "xvmul<VSs> %x0,%x1,%x2"
1615   [(set_attr "type" "<VStype_simple>")])
1616
1617 ; Emulate vector with scalar for vec_mul in V2DImode
1618 (define_insn_and_split "vsx_mul_v2di"
1619   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1620         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1621                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1622                      UNSPEC_VSX_MULSD))]
1623   "VECTOR_MEM_VSX_P (V2DImode)"
1624   "#"
1625   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1626   [(const_int 0)]
1627 {
1628   rtx op0 = operands[0];
1629   rtx op1 = operands[1];
1630   rtx op2 = operands[2];
1631   rtx op3 = gen_reg_rtx (DImode);
1632   rtx op4 = gen_reg_rtx (DImode);
1633   rtx op5 = gen_reg_rtx (DImode);
1634   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1635   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1636   if (TARGET_POWERPC64)
1637     emit_insn (gen_muldi3 (op5, op3, op4));
1638   else
1639     {
1640       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1641       emit_move_insn (op5, ret);
1642     }
1643   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1644   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1645   if (TARGET_POWERPC64)
1646     emit_insn (gen_muldi3 (op3, op3, op4));
1647   else
1648     {
1649       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1650       emit_move_insn (op3, ret);
1651     }
1652   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1653   DONE;
1654 }
1655   [(set_attr "type" "mul")])
1656
1657 (define_insn "*vsx_div<mode>3"
1658   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1659         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1660                    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1661   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1662   "xvdiv<VSs> %x0,%x1,%x2"
1663   [(set_attr "type" "<VStype_div>")])
1664
1665 ; Emulate vector with scalar for vec_div in V2DImode
1666 (define_insn_and_split "vsx_div_v2di"
1667   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1668         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1669                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1670                      UNSPEC_VSX_DIVSD))]
1671   "VECTOR_MEM_VSX_P (V2DImode)"
1672   "#"
1673   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1674   [(const_int 0)]
1675 {
1676   rtx op0 = operands[0];
1677   rtx op1 = operands[1];
1678   rtx op2 = operands[2];
1679   rtx op3 = gen_reg_rtx (DImode);
1680   rtx op4 = gen_reg_rtx (DImode);
1681   rtx op5 = gen_reg_rtx (DImode);
1682   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1683   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1684   if (TARGET_POWERPC64)
1685     emit_insn (gen_divdi3 (op5, op3, op4));
1686   else
1687     {
1688       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1689       rtx target = emit_library_call_value (libfunc,
1690                                             op5, LCT_NORMAL, DImode,
1691                                             op3, DImode,
1692                                             op4, DImode);
1693       emit_move_insn (op5, target);
1694     }
1695   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1696   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1697   if (TARGET_POWERPC64)
1698     emit_insn (gen_divdi3 (op3, op3, op4));
1699   else
1700     {
1701       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1702       rtx target = emit_library_call_value (libfunc,
1703                                             op3, LCT_NORMAL, DImode,
1704                                             op3, DImode,
1705                                             op4, DImode);
1706       emit_move_insn (op3, target);
1707     }
1708   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1709   DONE;
1710 }
1711   [(set_attr "type" "div")])
1712
1713 (define_insn_and_split "vsx_udiv_v2di"
1714   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1715         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1716                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1717                      UNSPEC_VSX_DIVUD))]
1718   "VECTOR_MEM_VSX_P (V2DImode)"
1719   "#"
1720   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1721   [(const_int 0)]
1722 {
1723   rtx op0 = operands[0];
1724   rtx op1 = operands[1];
1725   rtx op2 = operands[2];
1726   rtx op3 = gen_reg_rtx (DImode);
1727   rtx op4 = gen_reg_rtx (DImode);
1728   rtx op5 = gen_reg_rtx (DImode);
1729   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1730   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1731   if (TARGET_POWERPC64)
1732     emit_insn (gen_udivdi3 (op5, op3, op4));
1733   else
1734     {
1735       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1736       rtx target = emit_library_call_value (libfunc,
1737                                             op5, LCT_NORMAL, DImode,
1738                                             op3, DImode,
1739                                             op4, DImode);
1740       emit_move_insn (op5, target);
1741     }
1742   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1743   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1744   if (TARGET_POWERPC64)
1745     emit_insn (gen_udivdi3 (op3, op3, op4));
1746   else
1747     {
1748       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1749       rtx target = emit_library_call_value (libfunc,
1750                                             op3, LCT_NORMAL, DImode,
1751                                             op3, DImode,
1752                                             op4, DImode);
1753       emit_move_insn (op3, target);
1754     }
1755   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1756   DONE;
1757 }
1758   [(set_attr "type" "div")])
1759
1760 ;; *tdiv* instruction returning the FG flag
1761 (define_expand "vsx_tdiv<mode>3_fg"
1762   [(set (match_dup 3)
1763         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1764                       (match_operand:VSX_B 2 "vsx_register_operand")]
1765                      UNSPEC_VSX_TDIV))
1766    (set (match_operand:SI 0 "gpc_reg_operand")
1767         (gt:SI (match_dup 3)
1768                (const_int 0)))]
1769   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1770 {
1771   operands[3] = gen_reg_rtx (CCFPmode);
1772 })
1773
1774 ;; *tdiv* instruction returning the FE flag
1775 (define_expand "vsx_tdiv<mode>3_fe"
1776   [(set (match_dup 3)
1777         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1778                       (match_operand:VSX_B 2 "vsx_register_operand")]
1779                      UNSPEC_VSX_TDIV))
1780    (set (match_operand:SI 0 "gpc_reg_operand")
1781         (eq:SI (match_dup 3)
1782                (const_int 0)))]
1783   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1784 {
1785   operands[3] = gen_reg_rtx (CCFPmode);
1786 })
1787
1788 (define_insn "*vsx_tdiv<mode>3_internal"
1789   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1790         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1791                       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1792                    UNSPEC_VSX_TDIV))]
1793   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1794   "x<VSv>tdiv<VSs> %0,%x1,%x2"
1795   [(set_attr "type" "<VStype_simple>")])
1796
1797 (define_insn "vsx_fre<mode>2"
1798   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1799         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1800                       UNSPEC_FRES))]
1801   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1802   "xvre<VSs> %x0,%x1"
1803   [(set_attr "type" "<VStype_simple>")])
1804
1805 (define_insn "*vsx_neg<mode>2"
1806   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1807         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1808   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1809   "xvneg<VSs> %x0,%x1"
1810   [(set_attr "type" "<VStype_simple>")])
1811
1812 (define_insn "*vsx_abs<mode>2"
1813   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1814         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1815   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1816   "xvabs<VSs> %x0,%x1"
1817   [(set_attr "type" "<VStype_simple>")])
1818
1819 (define_insn "vsx_nabs<mode>2"
1820   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1821         (neg:VSX_F
1822          (abs:VSX_F
1823           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1824   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1825   "xvnabs<VSs> %x0,%x1"
1826   [(set_attr "type" "<VStype_simple>")])
1827
1828 (define_insn "vsx_smax<mode>3"
1829   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1830         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1831                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1832   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1833   "xvmax<VSs> %x0,%x1,%x2"
1834   [(set_attr "type" "<VStype_simple>")])
1835
1836 (define_insn "*vsx_smin<mode>3"
1837   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1838         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1839                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1840   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1841   "xvmin<VSs> %x0,%x1,%x2"
1842   [(set_attr "type" "<VStype_simple>")])
1843
1844 (define_insn "*vsx_sqrt<mode>2"
1845   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1846         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1847   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1848   "xvsqrt<VSs> %x0,%x1"
1849   [(set_attr "type" "<VStype_sqrt>")])
1850
1851 (define_insn "*vsx_rsqrte<mode>2"
1852   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1853         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1854                       UNSPEC_RSQRT))]
1855   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1856   "xvrsqrte<VSs> %x0,%x1"
1857   [(set_attr "type" "<VStype_simple>")])
1858
1859 ;; *tsqrt* returning the fg flag
1860 (define_expand "vsx_tsqrt<mode>2_fg"
1861   [(set (match_dup 2)
1862         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1863                      UNSPEC_VSX_TSQRT))
1864    (set (match_operand:SI 0 "gpc_reg_operand")
1865         (gt:SI (match_dup 2)
1866                (const_int 0)))]
1867   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1868 {
1869   operands[2] = gen_reg_rtx (CCFPmode);
1870 })
1871
1872 ;; *tsqrt* returning the fe flag
1873 (define_expand "vsx_tsqrt<mode>2_fe"
1874   [(set (match_dup 2)
1875         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1876                      UNSPEC_VSX_TSQRT))
1877    (set (match_operand:SI 0 "gpc_reg_operand")
1878         (eq:SI (match_dup 2)
1879                (const_int 0)))]
1880   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1881 {
1882   operands[2] = gen_reg_rtx (CCFPmode);
1883 })
1884
1885 (define_insn "*vsx_tsqrt<mode>2_internal"
1886   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1887         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1888                      UNSPEC_VSX_TSQRT))]
1889   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1890   "x<VSv>tsqrt<VSs> %0,%x1"
1891   [(set_attr "type" "<VStype_simple>")])
1892
1893 ;; Fused vector multiply/add instructions. Support the classical Altivec
1894 ;; versions of fma, which allows the target to be a separate register from the
1895 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1896 ;; multiply.
1897
1898 (define_insn "*vsx_fmav4sf4"
1899   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1900         (fma:V4SF
1901           (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1902           (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1903           (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1904   "VECTOR_UNIT_VSX_P (V4SFmode)"
1905   "@
1906    xvmaddasp %x0,%x1,%x2
1907    xvmaddmsp %x0,%x1,%x3
1908    xvmaddasp %x0,%x1,%x2
1909    xvmaddmsp %x0,%x1,%x3
1910    vmaddfp %0,%1,%2,%3"
1911   [(set_attr "type" "vecfloat")])
1912
1913 (define_insn "*vsx_fmav2df4"
1914   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1915         (fma:V2DF
1916           (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1917           (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1918           (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1919   "VECTOR_UNIT_VSX_P (V2DFmode)"
1920   "@
1921    xvmaddadp %x0,%x1,%x2
1922    xvmaddmdp %x0,%x1,%x3
1923    xvmaddadp %x0,%x1,%x2
1924    xvmaddmdp %x0,%x1,%x3"
1925   [(set_attr "type" "vecdouble")])
1926
1927 (define_insn "*vsx_fms<mode>4"
1928   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1929         (fma:VSX_F
1930           (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1931           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1932           (neg:VSX_F
1933             (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1934   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1935   "@
1936    xvmsuba<VSs> %x0,%x1,%x2
1937    xvmsubm<VSs> %x0,%x1,%x3
1938    xvmsuba<VSs> %x0,%x1,%x2
1939    xvmsubm<VSs> %x0,%x1,%x3"
1940   [(set_attr "type" "<VStype_mul>")])
1941
1942 (define_insn "*vsx_nfma<mode>4"
1943   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1944         (neg:VSX_F
1945          (fma:VSX_F
1946           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1947           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1948           (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1949   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1950   "@
1951    xvnmadda<VSs> %x0,%x1,%x2
1952    xvnmaddm<VSs> %x0,%x1,%x3
1953    xvnmadda<VSs> %x0,%x1,%x2
1954    xvnmaddm<VSs> %x0,%x1,%x3"
1955   [(set_attr "type" "<VStype_mul>")])
1956
1957 (define_insn "*vsx_nfmsv4sf4"
1958   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1959         (neg:V4SF
1960          (fma:V4SF
1961            (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1962            (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1963            (neg:V4SF
1964              (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1965   "VECTOR_UNIT_VSX_P (V4SFmode)"
1966   "@
1967    xvnmsubasp %x0,%x1,%x2
1968    xvnmsubmsp %x0,%x1,%x3
1969    xvnmsubasp %x0,%x1,%x2
1970    xvnmsubmsp %x0,%x1,%x3
1971    vnmsubfp %0,%1,%2,%3"
1972   [(set_attr "type" "vecfloat")])
1973
1974 (define_insn "*vsx_nfmsv2df4"
1975   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1976         (neg:V2DF
1977          (fma:V2DF
1978            (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1979            (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1980            (neg:V2DF
1981              (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1982   "VECTOR_UNIT_VSX_P (V2DFmode)"
1983   "@
1984    xvnmsubadp %x0,%x1,%x2
1985    xvnmsubmdp %x0,%x1,%x3
1986    xvnmsubadp %x0,%x1,%x2
1987    xvnmsubmdp %x0,%x1,%x3"
1988   [(set_attr "type" "vecdouble")])
1989
1990 ;; Vector conditional expressions (no scalar version for these instructions)
1991 (define_insn "vsx_eq<mode>"
1992   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1993         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1994                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1995   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1996   "xvcmpeq<VSs> %x0,%x1,%x2"
1997   [(set_attr "type" "<VStype_simple>")])
1998
1999 (define_insn "vsx_gt<mode>"
2000   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2001         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2002                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2003   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2004   "xvcmpgt<VSs> %x0,%x1,%x2"
2005   [(set_attr "type" "<VStype_simple>")])
2006
2007 (define_insn "*vsx_ge<mode>"
2008   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2009         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2010                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2011   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2012   "xvcmpge<VSs> %x0,%x1,%x2"
2013   [(set_attr "type" "<VStype_simple>")])
2014
2015 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2016 ;; indicate a combined status
2017 (define_insn "*vsx_eq_<mode>_p"
2018   [(set (reg:CC CR6_REGNO)
2019         (unspec:CC
2020          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2021                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2022          UNSPEC_PREDICATE))
2023    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2024         (eq:VSX_F (match_dup 1)
2025                   (match_dup 2)))]
2026   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2027   "xvcmpeq<VSs>. %x0,%x1,%x2"
2028   [(set_attr "type" "<VStype_simple>")])
2029
2030 (define_insn "*vsx_gt_<mode>_p"
2031   [(set (reg:CC CR6_REGNO)
2032         (unspec:CC
2033          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2034                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2035          UNSPEC_PREDICATE))
2036    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2037         (gt:VSX_F (match_dup 1)
2038                   (match_dup 2)))]
2039   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2040   "xvcmpgt<VSs>. %x0,%x1,%x2"
2041   [(set_attr "type" "<VStype_simple>")])
2042
2043 (define_insn "*vsx_ge_<mode>_p"
2044   [(set (reg:CC CR6_REGNO)
2045         (unspec:CC
2046          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2047                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2048          UNSPEC_PREDICATE))
2049    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2050         (ge:VSX_F (match_dup 1)
2051                   (match_dup 2)))]
2052   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2053   "xvcmpge<VSs>. %x0,%x1,%x2"
2054   [(set_attr "type" "<VStype_simple>")])
2055
2056 ;; Vector select
2057 (define_insn "*vsx_xxsel<mode>"
2058   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2059         (if_then_else:VSX_L
2060          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2061                 (match_operand:VSX_L 4 "zero_constant" ""))
2062          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2063          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2064   "VECTOR_MEM_VSX_P (<MODE>mode)"
2065   "xxsel %x0,%x3,%x2,%x1"
2066   [(set_attr "type" "vecmove")])
2067
2068 (define_insn "*vsx_xxsel<mode>_uns"
2069   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2070         (if_then_else:VSX_L
2071          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2072                    (match_operand:VSX_L 4 "zero_constant" ""))
2073          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2074          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2075   "VECTOR_MEM_VSX_P (<MODE>mode)"
2076   "xxsel %x0,%x3,%x2,%x1"
2077   [(set_attr "type" "vecmove")])
2078
2079 ;; Copy sign
2080 (define_insn "vsx_copysign<mode>3"
2081   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2082         (unspec:VSX_F
2083          [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2084           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
2085          UNSPEC_COPYSIGN))]
2086   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2087   "xvcpsgn<VSs> %x0,%x2,%x1"
2088   [(set_attr "type" "<VStype_simple>")])
2089
2090 ;; For the conversions, limit the register class for the integer value to be
2091 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2092 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2093 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2094 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2095 ;; in allowing virtual registers.
2096 (define_insn "vsx_float<VSi><mode>2"
2097   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2098         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2099   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2100   "xvcvsx<VSc><VSs> %x0,%x1"
2101   [(set_attr "type" "<VStype_simple>")])
2102
2103 (define_insn "vsx_floatuns<VSi><mode>2"
2104   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2105         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2106   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2107   "xvcvux<VSc><VSs> %x0,%x1"
2108   [(set_attr "type" "<VStype_simple>")])
2109
2110 (define_insn "vsx_fix_trunc<mode><VSi>2"
2111   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2112         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2113   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2114   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2115   [(set_attr "type" "<VStype_simple>")])
2116
2117 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2118   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2119         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2120   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2121   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2122   [(set_attr "type" "<VStype_simple>")])
2123
2124 ;; Math rounding functions
2125 (define_insn "vsx_x<VSv>r<VSs>i"
2126   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2127         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2128                       UNSPEC_VSX_ROUND_I))]
2129   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2130   "x<VSv>r<VSs>i %x0,%x1"
2131   [(set_attr "type" "<VStype_simple>")])
2132
2133 (define_insn "vsx_x<VSv>r<VSs>ic"
2134   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2135         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2136                       UNSPEC_VSX_ROUND_IC))]
2137   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2138   "x<VSv>r<VSs>ic %x0,%x1"
2139   [(set_attr "type" "<VStype_simple>")])
2140
2141 (define_insn "vsx_btrunc<mode>2"
2142   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2143         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
2144   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2145   "xvr<VSs>iz %x0,%x1"
2146   [(set_attr "type" "<VStype_simple>")])
2147
2148 (define_insn "*vsx_b2trunc<mode>2"
2149   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2150         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2151                       UNSPEC_FRIZ))]
2152   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2153   "x<VSv>r<VSs>iz %x0,%x1"
2154   [(set_attr "type" "<VStype_simple>")])
2155
2156 (define_insn "vsx_floor<mode>2"
2157   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2158         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2159                       UNSPEC_FRIM))]
2160   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2161   "xvr<VSs>im %x0,%x1"
2162   [(set_attr "type" "<VStype_simple>")])
2163
2164 (define_insn "vsx_ceil<mode>2"
2165   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2166         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2167                       UNSPEC_FRIP))]
2168   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2169   "xvr<VSs>ip %x0,%x1"
2170   [(set_attr "type" "<VStype_simple>")])
2171
2172 \f
2173 ;; VSX convert to/from double vector
2174
2175 ;; Convert between single and double precision
2176 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2177 ;; scalar single precision instructions internally use the double format.
2178 ;; Prefer the altivec registers, since we likely will need to do a vperm
2179 (define_insn "vsx_<VS_spdp_insn>"
2180   [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2181         (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
2182                               UNSPEC_VSX_CVSPDP))]
2183   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2184   "<VS_spdp_insn> %x0,%x1"
2185   [(set_attr "type" "<VS_spdp_type>")])
2186
2187 ;; xscvspdp, represent the scalar SF type as V4SF
2188 (define_insn "vsx_xscvspdp"
2189   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2190         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2191                    UNSPEC_VSX_CVSPDP))]
2192   "VECTOR_UNIT_VSX_P (V4SFmode)"
2193   "xscvspdp %x0,%x1"
2194   [(set_attr "type" "fp")])
2195
2196 ;; Same as vsx_xscvspdp, but use SF as the type
2197 (define_insn "vsx_xscvspdp_scalar2"
2198   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2199         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2200                    UNSPEC_VSX_CVSPDP))]
2201   "VECTOR_UNIT_VSX_P (V4SFmode)"
2202   "xscvspdp %x0,%x1"
2203   [(set_attr "type" "fp")])
2204
2205 ;; Generate xvcvhpsp instruction
2206 (define_insn "vsx_xvcvhpsp"
2207   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2208         (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2209                      UNSPEC_VSX_CVHPSP))]
2210   "TARGET_P9_VECTOR"
2211   "xvcvhpsp %x0,%x1"
2212   [(set_attr "type" "vecfloat")])
2213
2214 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2215 ;; format of scalars is actually DF.
2216 (define_insn "vsx_xscvdpsp_scalar"
2217   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2218         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2219                      UNSPEC_VSX_CVSPDP))]
2220   "VECTOR_UNIT_VSX_P (V4SFmode)"
2221   "xscvdpsp %x0,%x1"
2222   [(set_attr "type" "fp")])
2223
2224 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2225 (define_insn "vsx_xscvdpspn"
2226   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2227         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
2228                      UNSPEC_VSX_CVDPSPN))]
2229   "TARGET_XSCVDPSPN"
2230   "xscvdpspn %x0,%x1"
2231   [(set_attr "type" "fp")])
2232
2233 (define_insn "vsx_xscvspdpn"
2234   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2235         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2236                    UNSPEC_VSX_CVSPDPN))]
2237   "TARGET_XSCVSPDPN"
2238   "xscvspdpn %x0,%x1"
2239   [(set_attr "type" "fp")])
2240
2241 (define_insn "vsx_xscvdpspn_scalar"
2242   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2243         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2244                      UNSPEC_VSX_CVDPSPN))]
2245   "TARGET_XSCVDPSPN"
2246   "xscvdpspn %x0,%x1"
2247   [(set_attr "type" "fp")])
2248
2249 ;; Used by direct move to move a SFmode value from GPR to VSX register
2250 (define_insn "vsx_xscvspdpn_directmove"
2251   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2252         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2253                    UNSPEC_VSX_CVSPDPN))]
2254   "TARGET_XSCVSPDPN"
2255   "xscvspdpn %x0,%x1"
2256   [(set_attr "type" "fp")])
2257
2258 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2259
2260 (define_expand "vsx_xvcvsxddp_scale"
2261   [(match_operand:V2DF 0 "vsx_register_operand")
2262    (match_operand:V2DI 1 "vsx_register_operand")
2263    (match_operand:QI 2 "immediate_operand")]
2264   "VECTOR_UNIT_VSX_P (V2DFmode)"
2265 {
2266   rtx op0 = operands[0];
2267   rtx op1 = operands[1];
2268   int scale = INTVAL(operands[2]);
2269   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2270   if (scale != 0)
2271     rs6000_scale_v2df (op0, op0, -scale);
2272   DONE;
2273 })
2274
2275 (define_insn "vsx_xvcvsxddp"
2276   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2277         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2278                      UNSPEC_VSX_XVCVSXDDP))]
2279   "VECTOR_UNIT_VSX_P (V2DFmode)"
2280   "xvcvsxddp %x0,%x1"
2281   [(set_attr "type" "vecdouble")])
2282
2283 (define_expand "vsx_xvcvuxddp_scale"
2284   [(match_operand:V2DF 0 "vsx_register_operand")
2285    (match_operand:V2DI 1 "vsx_register_operand")
2286    (match_operand:QI 2 "immediate_operand")]
2287   "VECTOR_UNIT_VSX_P (V2DFmode)"
2288 {
2289   rtx op0 = operands[0];
2290   rtx op1 = operands[1];
2291   int scale = INTVAL(operands[2]);
2292   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2293   if (scale != 0)
2294     rs6000_scale_v2df (op0, op0, -scale);
2295   DONE;
2296 })
2297
2298 (define_insn "vsx_xvcvuxddp"
2299   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2300         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2301                      UNSPEC_VSX_XVCVUXDDP))]
2302   "VECTOR_UNIT_VSX_P (V2DFmode)"
2303   "xvcvuxddp %x0,%x1"
2304   [(set_attr "type" "vecdouble")])
2305
2306 (define_expand "vsx_xvcvdpsxds_scale"
2307   [(match_operand:V2DI 0 "vsx_register_operand")
2308    (match_operand:V2DF 1 "vsx_register_operand")
2309    (match_operand:QI 2 "immediate_operand")]
2310   "VECTOR_UNIT_VSX_P (V2DFmode)"
2311 {
2312   rtx op0 = operands[0];
2313   rtx op1 = operands[1];
2314   rtx tmp;
2315   int scale = INTVAL (operands[2]);
2316   if (scale == 0)
2317     tmp = op1;
2318   else
2319     {
2320       tmp  = gen_reg_rtx (V2DFmode);
2321       rs6000_scale_v2df (tmp, op1, scale);
2322     }
2323   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2324   DONE;
2325 })
2326
2327 ;; convert vector of 64-bit floating point numbers to vector of
2328 ;; 64-bit signed integer
2329 (define_insn "vsx_xvcvdpsxds"
2330   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2331         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2332                      UNSPEC_VSX_XVCVDPSXDS))]
2333   "VECTOR_UNIT_VSX_P (V2DFmode)"
2334   "xvcvdpsxds %x0,%x1"
2335   [(set_attr "type" "vecdouble")])
2336
2337 ;; convert vector of 32-bit floating point numbers to vector of
2338 ;; 32-bit signed integer
2339 (define_insn "vsx_xvcvspsxws"
2340   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2341         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2342                      UNSPEC_VSX_XVCVSPSXWS))]
2343   "VECTOR_UNIT_VSX_P (V4SFmode)"
2344   "xvcvspsxws %x0,%x1"
2345   [(set_attr "type" "vecfloat")])
2346
2347 ;; convert vector of 64-bit floating point numbers to vector of
2348 ;; 64-bit unsigned integer
2349 (define_expand "vsx_xvcvdpuxds_scale"
2350   [(match_operand:V2DI 0 "vsx_register_operand")
2351    (match_operand:V2DF 1 "vsx_register_operand")
2352    (match_operand:QI 2 "immediate_operand")]
2353   "VECTOR_UNIT_VSX_P (V2DFmode)"
2354 {
2355   rtx op0 = operands[0];
2356   rtx op1 = operands[1];
2357   rtx tmp;
2358   int scale = INTVAL (operands[2]);
2359   if (scale == 0)
2360     tmp = op1;
2361   else
2362     {
2363       tmp = gen_reg_rtx (V2DFmode);
2364       rs6000_scale_v2df (tmp, op1, scale);
2365     }
2366   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2367   DONE;
2368 })
2369
2370 ;; convert vector of 32-bit floating point numbers to vector of
2371 ;; 32-bit unsigned integer
2372 (define_insn "vsx_xvcvspuxws"
2373   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2374         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2375                      UNSPEC_VSX_XVCVSPSXWS))]
2376   "VECTOR_UNIT_VSX_P (V4SFmode)"
2377   "xvcvspuxws %x0,%x1"
2378   [(set_attr "type" "vecfloat")])
2379
2380 (define_insn "vsx_xvcvdpuxds"
2381   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2382         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2383                      UNSPEC_VSX_XVCVDPUXDS))]
2384   "VECTOR_UNIT_VSX_P (V2DFmode)"
2385   "xvcvdpuxds %x0,%x1"
2386   [(set_attr "type" "vecdouble")])
2387
2388 ;; Convert from 64-bit to 32-bit types
2389 ;; Note, favor the Altivec registers since the usual use of these instructions
2390 ;; is in vector converts and we need to use the Altivec vperm instruction.
2391
2392 (define_insn "vsx_xvcvdpsxws"
2393   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2394         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2395                      UNSPEC_VSX_CVDPSXWS))]
2396   "VECTOR_UNIT_VSX_P (V2DFmode)"
2397   "xvcvdpsxws %x0,%x1"
2398   [(set_attr "type" "vecdouble")])
2399
2400 (define_insn "vsx_xvcvdpuxws"
2401   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2402         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2403                      UNSPEC_VSX_CVDPUXWS))]
2404   "VECTOR_UNIT_VSX_P (V2DFmode)"
2405   "xvcvdpuxws %x0,%x1"
2406   [(set_attr "type" "vecdouble")])
2407
2408 (define_insn "vsx_xvcvsxdsp"
2409   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2410         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2411                      UNSPEC_VSX_CVSXDSP))]
2412   "VECTOR_UNIT_VSX_P (V2DFmode)"
2413   "xvcvsxdsp %x0,%x1"
2414   [(set_attr "type" "vecfloat")])
2415
2416 (define_insn "vsx_xvcvuxdsp"
2417   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2418         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2419                      UNSPEC_VSX_CVUXDSP))]
2420   "VECTOR_UNIT_VSX_P (V2DFmode)"
2421   "xvcvuxdsp %x0,%x1"
2422   [(set_attr "type" "vecdouble")])
2423
2424 (define_insn "vsx_xvcdpsp"
2425   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2426         (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2427                      UNSPEC_VSX_XVCDPSP))]
2428   "VECTOR_UNIT_VSX_P (V2DFmode)"
2429   "xvcvdpsp %x0,%x1"
2430   [(set_attr "type" "vecdouble")])
2431
2432 ;; Convert from 32-bit to 64-bit types
2433 ;; Provide both vector and scalar targets
2434 (define_insn "vsx_xvcvsxwdp"
2435   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2436         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2437                      UNSPEC_VSX_CVSXWDP))]
2438   "VECTOR_UNIT_VSX_P (V2DFmode)"
2439   "xvcvsxwdp %x0,%x1"
2440   [(set_attr "type" "vecdouble")])
2441
2442 (define_insn "vsx_xvcvsxwdp_df"
2443   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2444         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2445                    UNSPEC_VSX_CVSXWDP))]
2446   "TARGET_VSX"
2447   "xvcvsxwdp %x0,%x1"
2448   [(set_attr "type" "vecdouble")])
2449
2450 (define_insn "vsx_xvcvuxwdp"
2451   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2452         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2453                      UNSPEC_VSX_CVUXWDP))]
2454   "VECTOR_UNIT_VSX_P (V2DFmode)"
2455   "xvcvuxwdp %x0,%x1"
2456   [(set_attr "type" "vecdouble")])
2457
2458 (define_insn "vsx_xvcvuxwdp_df"
2459   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2460         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2461                    UNSPEC_VSX_CVUXWDP))]
2462   "TARGET_VSX"
2463   "xvcvuxwdp %x0,%x1"
2464   [(set_attr "type" "vecdouble")])
2465
2466 (define_insn "vsx_xvcvspsxds"
2467   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2468         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2469                      UNSPEC_VSX_CVSPSXDS))]
2470   "VECTOR_UNIT_VSX_P (V2DFmode)"
2471   "xvcvspsxds %x0,%x1"
2472   [(set_attr "type" "vecdouble")])
2473
2474 (define_insn "vsx_xvcvspuxds"
2475   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2476         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2477                      UNSPEC_VSX_CVSPUXDS))]
2478   "VECTOR_UNIT_VSX_P (V2DFmode)"
2479   "xvcvspuxds %x0,%x1"
2480   [(set_attr "type" "vecdouble")])
2481
2482 (define_insn "vsx_xvcvsxwsp"
2483   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2484         (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2485                      UNSPEC_VSX_CVSXWSP))]
2486   "VECTOR_UNIT_VSX_P (V4SFmode)"
2487   "xvcvsxwsp %x0,%x1"
2488   [(set_attr "type" "vecfloat")])
2489
2490 (define_insn "vsx_xvcvuxwsp"
2491   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2492         (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2493                     UNSPEC_VSX_CVUXWSP))]
2494   "VECTOR_UNIT_VSX_P (V4SFmode)"
2495   "xvcvuxwsp %x0,%x1"
2496   [(set_attr "type" "vecfloat")])
2497
2498 ;; Generate float2 double
2499 ;; convert two double to float
2500 (define_expand "float2_v2df"
2501   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2502    (use (match_operand:V2DF 1 "register_operand" "wa"))
2503    (use (match_operand:V2DF 2 "register_operand" "wa"))]
2504  "VECTOR_UNIT_VSX_P (V4SFmode)"
2505 {
2506   rtx rtx_src1, rtx_src2, rtx_dst;
2507
2508   rtx_dst = operands[0];
2509   rtx_src1 = operands[1];
2510   rtx_src2 = operands[2];
2511
2512   rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2513   DONE;
2514 })
2515
2516 ;; Generate float2
2517 ;; convert two long long signed ints to float
2518 (define_expand "float2_v2di"
2519   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2520    (use (match_operand:V2DI 1 "register_operand" "wa"))
2521    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2522  "VECTOR_UNIT_VSX_P (V4SFmode)"
2523 {
2524   rtx rtx_src1, rtx_src2, rtx_dst;
2525
2526   rtx_dst = operands[0];
2527   rtx_src1 = operands[1];
2528   rtx_src2 = operands[2];
2529
2530   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2531   DONE;
2532 })
2533
2534 ;; Generate uns_float2
2535 ;; convert two long long unsigned ints to float
2536 (define_expand "uns_float2_v2di"
2537   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2538    (use (match_operand:V2DI 1 "register_operand" "wa"))
2539    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2540  "VECTOR_UNIT_VSX_P (V4SFmode)"
2541 {
2542   rtx rtx_src1, rtx_src2, rtx_dst;
2543
2544   rtx_dst = operands[0];
2545   rtx_src1 = operands[1];
2546   rtx_src2 = operands[2];
2547
2548   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2549   DONE;
2550 })
2551
2552 ;; Generate floate
2553 ;; convert  double or long long signed to float
2554 ;; (Only even words are valid, BE numbering)
2555 (define_expand "floate<mode>"
2556   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2557    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2558   "VECTOR_UNIT_VSX_P (V4SFmode)"
2559 {
2560   if (BYTES_BIG_ENDIAN)
2561     {
2562       /* Shift left one word to put even word correct location */
2563       rtx rtx_tmp;
2564       rtx rtx_val = GEN_INT (4);
2565
2566       rtx_tmp = gen_reg_rtx (V4SFmode);
2567       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2568       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2569                  rtx_tmp, rtx_tmp, rtx_val));
2570     }
2571   else
2572     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2573
2574   DONE;
2575 })
2576
2577 ;; Generate uns_floate
2578 ;; convert long long unsigned to float
2579 ;; (Only even words are valid, BE numbering)
2580 (define_expand "unsfloatev2di"
2581   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2582    (use (match_operand:V2DI 1 "register_operand" "wa"))]
2583   "VECTOR_UNIT_VSX_P (V4SFmode)"
2584 {
2585   if (BYTES_BIG_ENDIAN)
2586     {
2587       /* Shift left one word to put even word correct location */
2588       rtx rtx_tmp;
2589       rtx rtx_val = GEN_INT (4);
2590
2591       rtx_tmp = gen_reg_rtx (V4SFmode);
2592       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2593       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2594                  rtx_tmp, rtx_tmp, rtx_val));
2595     }
2596   else
2597     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2598
2599   DONE;
2600 })
2601
2602 ;; Generate floato
2603 ;; convert double or long long signed to float
2604 ;; Only odd words are valid, BE numbering)
2605 (define_expand "floato<mode>"
2606   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2607    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2608   "VECTOR_UNIT_VSX_P (V4SFmode)"
2609 {
2610   if (BYTES_BIG_ENDIAN)
2611     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2612   else
2613     {
2614       /* Shift left one word to put odd word correct location */
2615       rtx rtx_tmp;
2616       rtx rtx_val = GEN_INT (4);
2617
2618       rtx_tmp = gen_reg_rtx (V4SFmode);
2619       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2620       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2621                  rtx_tmp, rtx_tmp, rtx_val));
2622     }
2623   DONE;
2624 })
2625
2626 ;; Generate uns_floato
2627 ;; convert long long unsigned to float
2628 ;; (Only odd words are valid, BE numbering)
2629 (define_expand "unsfloatov2di"
2630  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2631   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2632  "VECTOR_UNIT_VSX_P (V4SFmode)"
2633 {
2634   if (BYTES_BIG_ENDIAN)
2635     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2636   else
2637     {
2638       /* Shift left one word to put odd word correct location */
2639       rtx rtx_tmp;
2640       rtx rtx_val = GEN_INT (4);
2641
2642       rtx_tmp = gen_reg_rtx (V4SFmode);
2643       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2644       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2645                  rtx_tmp, rtx_tmp, rtx_val));
2646     }
2647   DONE;
2648 })
2649
2650 ;; Generate vsigned2
2651 ;; convert two double float vectors to a vector of single precision ints
2652 (define_expand "vsigned2_v2df"
2653   [(match_operand:V4SI 0 "register_operand" "=wa")
2654    (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2655                  (match_operand:V2DF 2 "register_operand" "wa")]
2656   UNSPEC_VSX_VSIGNED2)]
2657   "TARGET_VSX"
2658 {
2659   rtx rtx_src1, rtx_src2, rtx_dst;
2660   bool signed_convert=true;
2661
2662   rtx_dst = operands[0];
2663   rtx_src1 = operands[1];
2664   rtx_src2 = operands[2];
2665
2666   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2667   DONE;
2668 })
2669
2670 ;; Generate vsignedo_v2df
2671 ;; signed double float to int convert odd word
2672 (define_expand "vsignedo_v2df"
2673   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2674         (match_operand:V2DF 1 "register_operand" "wa"))]
2675   "TARGET_VSX"
2676 {
2677   if (BYTES_BIG_ENDIAN)
2678     {
2679       rtx rtx_tmp;
2680       rtx rtx_val = GEN_INT (12);
2681       rtx_tmp = gen_reg_rtx (V4SImode);
2682
2683       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2684
2685       /* Big endian word numbering for words in operand is 0 1 2 3.
2686          take (operand[1] operand[1]) and shift left one word
2687          0 1 2 3    0 1 2 3  =>  1 2 3 0
2688          Words 1 and 3 are now are now where they need to be for result.  */
2689
2690       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2691                  rtx_tmp, rtx_val));
2692     }
2693   else
2694     /* Little endian word numbering for operand is 3 2 1 0.
2695        Result words 3 and 1 are where they need to be.  */
2696     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2697
2698   DONE;
2699 }
2700   [(set_attr "type" "veccomplex")])
2701
2702 ;; Generate vsignede_v2df
2703 ;; signed double float to int even word
2704 (define_expand "vsignede_v2df"
2705   [(set (match_operand:V4SI 0 "register_operand" "=v")
2706         (match_operand:V2DF 1 "register_operand" "v"))]
2707   "TARGET_VSX"
2708 {
2709   if (BYTES_BIG_ENDIAN)
2710     /* Big endian word numbering for words in operand is 0 1
2711        Result words 0 is where they need to be.  */
2712     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2713
2714   else
2715     {
2716       rtx rtx_tmp;
2717       rtx rtx_val = GEN_INT (12);
2718       rtx_tmp = gen_reg_rtx (V4SImode);
2719
2720       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2721
2722       /* Little endian word numbering for operand is 3 2 1 0.
2723          take (operand[1] operand[1]) and shift left three words
2724          0 1 2 3   0 1 2 3  =>  3 0 1 2
2725          Words 0 and 2 are now where they need to be for the result.  */
2726       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2727                  rtx_tmp, rtx_val));
2728     }
2729   DONE;
2730 }
2731   [(set_attr "type" "veccomplex")])
2732
2733 ;; Generate unsigned2
2734 ;; convert two double float vectors to a vector of single precision
2735 ;; unsigned ints
2736 (define_expand "vunsigned2_v2df"
2737 [(match_operand:V4SI 0 "register_operand" "=v")
2738  (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2739                (match_operand:V2DF 2 "register_operand" "v")]
2740               UNSPEC_VSX_VSIGNED2)]
2741  "TARGET_VSX"
2742 {
2743   rtx rtx_src1, rtx_src2, rtx_dst;
2744   bool signed_convert=false;
2745
2746   rtx_dst = operands[0];
2747   rtx_src1 = operands[1];
2748   rtx_src2 = operands[2];
2749
2750   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2751   DONE;
2752 })
2753
2754 ;; Generate vunsignedo_v2df
2755 ;; unsigned double float to int convert odd word
2756 (define_expand "vunsignedo_v2df"
2757   [(set (match_operand:V4SI 0 "register_operand" "=v")
2758         (match_operand:V2DF 1 "register_operand" "v"))]
2759   "TARGET_VSX"
2760 {
2761   if (BYTES_BIG_ENDIAN)
2762     {
2763       rtx rtx_tmp;
2764       rtx rtx_val = GEN_INT (12);
2765       rtx_tmp = gen_reg_rtx (V4SImode);
2766
2767       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2768
2769       /* Big endian word numbering for words in operand is 0 1 2 3.
2770          take (operand[1] operand[1]) and shift left one word
2771          0 1 2 3    0 1 2 3  =>  1 2 3 0
2772          Words 1 and 3 are now are now where they need to be for result.  */
2773
2774       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2775                  rtx_tmp, rtx_val));
2776     }
2777   else
2778     /* Little endian word numbering for operand is 3 2 1 0.
2779        Result words 3 and 1 are where they need to be.  */
2780     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2781
2782   DONE;
2783 }
2784   [(set_attr "type" "veccomplex")])
2785
2786 ;; Generate vunsignede_v2df
2787 ;; unsigned double float to int even word
2788 (define_expand "vunsignede_v2df"
2789   [(set (match_operand:V4SI 0 "register_operand" "=v")
2790         (match_operand:V2DF 1 "register_operand" "v"))]
2791   "TARGET_VSX"
2792 {
2793   if (BYTES_BIG_ENDIAN)
2794     /* Big endian word numbering for words in operand is 0 1
2795        Result words 0 is where they need to be.  */
2796     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2797
2798   else
2799     {
2800       rtx rtx_tmp;
2801       rtx rtx_val = GEN_INT (12);
2802       rtx_tmp = gen_reg_rtx (V4SImode);
2803
2804       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2805
2806       /* Little endian word numbering for operand is 3 2 1 0.
2807          take (operand[1] operand[1]) and shift left three words
2808          0 1 2 3   0 1 2 3  =>  3 0 1 2
2809          Words 0 and 2 are now where they need to be for the result.  */
2810       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2811                  rtx_tmp, rtx_val));
2812     }
2813   DONE;
2814 }
2815   [(set_attr "type" "veccomplex")])
2816
2817 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2818 ;; since the xvrdpiz instruction does not truncate the value if the floating
2819 ;; point value is < LONG_MIN or > LONG_MAX.
2820 (define_insn "*vsx_float_fix_v2df2"
2821   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2822         (float:V2DF
2823          (fix:V2DI
2824           (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2825   "TARGET_HARD_FLOAT
2826    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2827    && !flag_trapping_math && TARGET_FRIZ"
2828   "xvrdpiz %x0,%x1"
2829   [(set_attr "type" "vecdouble")])
2830
2831 \f
2832 ;; Permute operations
2833
2834 ;; Build a V2DF/V2DI vector from two scalars
2835 (define_insn "vsx_concat_<mode>"
2836   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2837         (vec_concat:VSX_D
2838          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2839          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2840   "VECTOR_MEM_VSX_P (<MODE>mode)"
2841 {
2842   if (which_alternative == 0)
2843     return (BYTES_BIG_ENDIAN
2844             ? "xxpermdi %x0,%x1,%x2,0"
2845             : "xxpermdi %x0,%x2,%x1,0");
2846
2847   else if (which_alternative == 1)
2848     return (BYTES_BIG_ENDIAN
2849             ? "mtvsrdd %x0,%1,%2"
2850             : "mtvsrdd %x0,%2,%1");
2851
2852   else
2853     gcc_unreachable ();
2854 }
2855   [(set_attr "type" "vecperm")])
2856
2857 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2858 ;; word element in a vector register.
2859 (define_insn "*vsx_concat_<mode>_1"
2860   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2861         (vec_concat:VSX_D
2862          (vec_select:<VS_scalar>
2863           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2864           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2865          (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2866   "VECTOR_MEM_VSX_P (<MODE>mode)"
2867 {
2868   HOST_WIDE_INT dword = INTVAL (operands[2]);
2869   if (BYTES_BIG_ENDIAN)
2870     {
2871       operands[4] = GEN_INT (2*dword);
2872       return "xxpermdi %x0,%x1,%x3,%4";
2873     }
2874   else
2875     {
2876       operands[4] = GEN_INT (!dword);
2877       return "xxpermdi %x0,%x3,%x1,%4";
2878     }
2879 }
2880   [(set_attr "type" "vecperm")])
2881
2882 (define_insn "*vsx_concat_<mode>_2"
2883   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2884         (vec_concat:VSX_D
2885          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2886          (vec_select:<VS_scalar>
2887           (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2888           (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2889   "VECTOR_MEM_VSX_P (<MODE>mode)"
2890 {
2891   HOST_WIDE_INT dword = INTVAL (operands[3]);
2892   if (BYTES_BIG_ENDIAN)
2893     {
2894       operands[4] = GEN_INT (dword);
2895       return "xxpermdi %x0,%x1,%x2,%4";
2896     }
2897   else
2898     {
2899       operands[4] = GEN_INT (2 * !dword);
2900       return "xxpermdi %x0,%x2,%x1,%4";
2901     }
2902 }
2903   [(set_attr "type" "vecperm")])
2904
2905 (define_insn "*vsx_concat_<mode>_3"
2906   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2907         (vec_concat:VSX_D
2908          (vec_select:<VS_scalar>
2909           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2910           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2911          (vec_select:<VS_scalar>
2912           (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2913           (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2914   "VECTOR_MEM_VSX_P (<MODE>mode)"
2915 {
2916   HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2917   HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2918   if (BYTES_BIG_ENDIAN)
2919     {
2920       operands[5] = GEN_INT ((2 * dword1) + dword2);
2921       return "xxpermdi %x0,%x1,%x3,%5";
2922     }
2923   else
2924     {
2925       operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2926       return "xxpermdi %x0,%x3,%x1,%5";
2927     }
2928 }
2929   [(set_attr "type" "vecperm")])
2930
2931 ;; Special purpose concat using xxpermdi to glue two single precision values
2932 ;; together, relying on the fact that internally scalar floats are represented
2933 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2934 (define_insn "vsx_concat_v2sf"
2935   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2936         (unspec:V2DF
2937          [(match_operand:SF 1 "vsx_register_operand" "ww")
2938           (match_operand:SF 2 "vsx_register_operand" "ww")]
2939          UNSPEC_VSX_CONCAT))]
2940   "VECTOR_MEM_VSX_P (V2DFmode)"
2941 {
2942   if (BYTES_BIG_ENDIAN)
2943     return "xxpermdi %x0,%x1,%x2,0";
2944   else
2945     return "xxpermdi %x0,%x2,%x1,0";
2946 }
2947   [(set_attr "type" "vecperm")])
2948
2949 ;; V4SImode initialization splitter
2950 (define_insn_and_split "vsx_init_v4si"
2951   [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2952         (unspec:V4SI
2953          [(match_operand:SI 1 "reg_or_cint_operand" "rn")
2954           (match_operand:SI 2 "reg_or_cint_operand" "rn")
2955           (match_operand:SI 3 "reg_or_cint_operand" "rn")
2956           (match_operand:SI 4 "reg_or_cint_operand" "rn")]
2957          UNSPEC_VSX_VEC_INIT))
2958    (clobber (match_scratch:DI 5 "=&r"))
2959    (clobber (match_scratch:DI 6 "=&r"))]
2960    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2961    "#"
2962    "&& reload_completed"
2963    [(const_int 0)]
2964 {
2965   rs6000_split_v4si_init (operands);
2966   DONE;
2967 })
2968
2969 ;; xxpermdi for little endian loads and stores.  We need several of
2970 ;; these since the form of the PARALLEL differs by mode.
2971 (define_insn "*vsx_xxpermdi2_le_<mode>"
2972   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2973         (vec_select:VSX_D
2974           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2975           (parallel [(const_int 1) (const_int 0)])))]
2976   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2977   "xxpermdi %x0,%x1,%x1,2"
2978   [(set_attr "type" "vecperm")])
2979
2980 (define_insn "*vsx_xxpermdi4_le_<mode>"
2981   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2982         (vec_select:VSX_W
2983           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2984           (parallel [(const_int 2) (const_int 3)
2985                      (const_int 0) (const_int 1)])))]
2986   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2987   "xxpermdi %x0,%x1,%x1,2"
2988   [(set_attr "type" "vecperm")])
2989
2990 (define_insn "*vsx_xxpermdi8_le_V8HI"
2991   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2992         (vec_select:V8HI
2993           (match_operand:V8HI 1 "vsx_register_operand" "wa")
2994           (parallel [(const_int 4) (const_int 5)
2995                      (const_int 6) (const_int 7)
2996                      (const_int 0) (const_int 1)
2997                      (const_int 2) (const_int 3)])))]
2998   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
2999   "xxpermdi %x0,%x1,%x1,2"
3000   [(set_attr "type" "vecperm")])
3001
3002 (define_insn "*vsx_xxpermdi16_le_V16QI"
3003   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3004         (vec_select:V16QI
3005           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3006           (parallel [(const_int 8) (const_int 9)
3007                      (const_int 10) (const_int 11)
3008                      (const_int 12) (const_int 13)
3009                      (const_int 14) (const_int 15)
3010                      (const_int 0) (const_int 1)
3011                      (const_int 2) (const_int 3)
3012                      (const_int 4) (const_int 5)
3013                      (const_int 6) (const_int 7)])))]
3014   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3015   "xxpermdi %x0,%x1,%x1,2"
3016   [(set_attr "type" "vecperm")])
3017
3018 ;; lxvd2x for little endian loads.  We need several of
3019 ;; these since the form of the PARALLEL differs by mode.
3020 (define_insn "*vsx_lxvd2x2_le_<mode>"
3021   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3022         (vec_select:VSX_D
3023           (match_operand:VSX_D 1 "memory_operand" "Z")
3024           (parallel [(const_int 1) (const_int 0)])))]
3025   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3026   "lxvd2x %x0,%y1"
3027   [(set_attr "type" "vecload")])
3028
3029 (define_insn "*vsx_lxvd2x4_le_<mode>"
3030   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3031         (vec_select:VSX_W
3032           (match_operand:VSX_W 1 "memory_operand" "Z")
3033           (parallel [(const_int 2) (const_int 3)
3034                      (const_int 0) (const_int 1)])))]
3035   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3036   "lxvd2x %x0,%y1"
3037   [(set_attr "type" "vecload")])
3038
3039 (define_insn "*vsx_lxvd2x8_le_V8HI"
3040   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3041         (vec_select:V8HI
3042           (match_operand:V8HI 1 "memory_operand" "Z")
3043           (parallel [(const_int 4) (const_int 5)
3044                      (const_int 6) (const_int 7)
3045                      (const_int 0) (const_int 1)
3046                      (const_int 2) (const_int 3)])))]
3047   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3048   "lxvd2x %x0,%y1"
3049   [(set_attr "type" "vecload")])
3050
3051 (define_insn "*vsx_lxvd2x16_le_V16QI"
3052   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3053         (vec_select:V16QI
3054           (match_operand:V16QI 1 "memory_operand" "Z")
3055           (parallel [(const_int 8) (const_int 9)
3056                      (const_int 10) (const_int 11)
3057                      (const_int 12) (const_int 13)
3058                      (const_int 14) (const_int 15)
3059                      (const_int 0) (const_int 1)
3060                      (const_int 2) (const_int 3)
3061                      (const_int 4) (const_int 5)
3062                      (const_int 6) (const_int 7)])))]
3063   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3064   "lxvd2x %x0,%y1"
3065   [(set_attr "type" "vecload")])
3066
3067 ;; stxvd2x for little endian stores.  We need several of
3068 ;; these since the form of the PARALLEL differs by mode.
3069 (define_insn "*vsx_stxvd2x2_le_<mode>"
3070   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3071         (vec_select:VSX_D
3072           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3073           (parallel [(const_int 1) (const_int 0)])))]
3074   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3075   "stxvd2x %x1,%y0"
3076   [(set_attr "type" "vecstore")])
3077
3078 (define_insn "*vsx_stxvd2x4_le_<mode>"
3079   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3080         (vec_select:VSX_W
3081           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3082           (parallel [(const_int 2) (const_int 3)
3083                      (const_int 0) (const_int 1)])))]
3084   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3085   "stxvd2x %x1,%y0"
3086   [(set_attr "type" "vecstore")])
3087
3088 (define_insn "*vsx_stxvd2x8_le_V8HI"
3089   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3090         (vec_select:V8HI
3091           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3092           (parallel [(const_int 4) (const_int 5)
3093                      (const_int 6) (const_int 7)
3094                      (const_int 0) (const_int 1)
3095                      (const_int 2) (const_int 3)])))]
3096   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3097   "stxvd2x %x1,%y0"
3098   [(set_attr "type" "vecstore")])
3099
3100 (define_insn "*vsx_stxvd2x16_le_V16QI"
3101   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3102         (vec_select:V16QI
3103           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3104           (parallel [(const_int 8) (const_int 9)
3105                      (const_int 10) (const_int 11)
3106                      (const_int 12) (const_int 13)
3107                      (const_int 14) (const_int 15)
3108                      (const_int 0) (const_int 1)
3109                      (const_int 2) (const_int 3)
3110                      (const_int 4) (const_int 5)
3111                      (const_int 6) (const_int 7)])))]
3112   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3113   "stxvd2x %x1,%y0"
3114   [(set_attr "type" "vecstore")])
3115
3116 ;; Convert a TImode value into V1TImode
3117 (define_expand "vsx_set_v1ti"
3118   [(match_operand:V1TI 0 "nonimmediate_operand")
3119    (match_operand:V1TI 1 "nonimmediate_operand")
3120    (match_operand:TI 2 "input_operand")
3121    (match_operand:QI 3 "u5bit_cint_operand")]
3122   "VECTOR_MEM_VSX_P (V1TImode)"
3123 {
3124   if (operands[3] != const0_rtx)
3125     gcc_unreachable ();
3126
3127   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3128   DONE;
3129 })
3130
3131 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3132 (define_expand "vsx_set_<mode>"
3133   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3134    (use (match_operand:VSX_D 1 "vsx_register_operand"))
3135    (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3136    (use (match_operand:QI 3 "const_0_to_1_operand"))]
3137   "VECTOR_MEM_VSX_P (<MODE>mode)"
3138 {
3139   rtx dest = operands[0];
3140   rtx vec_reg = operands[1];
3141   rtx value = operands[2];
3142   rtx ele = operands[3];
3143   rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3144
3145   if (ele == const0_rtx)
3146     {
3147       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3148       emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3149       DONE;
3150     }
3151   else if (ele == const1_rtx)
3152     {
3153       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3154       emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3155       DONE;
3156     }
3157   else
3158     gcc_unreachable ();
3159 })
3160
3161 ;; Extract a DF/DI element from V2DF/V2DI
3162 ;; Optimize cases were we can do a simple or direct move.
3163 ;; Or see if we can avoid doing the move at all
3164
3165 ;; There are some unresolved problems with reload that show up if an Altivec
3166 ;; register was picked.  Limit the scalar value to FPRs for now.
3167
3168 (define_insn "vsx_extract_<mode>"
3169   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
3170
3171         (vec_select:<VS_scalar>
3172          (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
3173
3174          (parallel
3175           [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
3176   "VECTOR_MEM_VSX_P (<MODE>mode)"
3177 {
3178   int element = INTVAL (operands[2]);
3179   int op0_regno = REGNO (operands[0]);
3180   int op1_regno = REGNO (operands[1]);
3181   int fldDM;
3182
3183   gcc_assert (IN_RANGE (element, 0, 1));
3184   gcc_assert (VSX_REGNO_P (op1_regno));
3185
3186   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3187     {
3188       if (op0_regno == op1_regno)
3189         return ASM_COMMENT_START " vec_extract to same register";
3190
3191       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3192                && TARGET_POWERPC64)
3193         return "mfvsrd %0,%x1";
3194
3195       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3196         return "fmr %0,%1";
3197
3198       else if (VSX_REGNO_P (op0_regno))
3199         return "xxlor %x0,%x1,%x1";
3200
3201       else
3202         gcc_unreachable ();
3203     }
3204
3205   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3206            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3207     return "mfvsrld %0,%x1";
3208
3209   else if (VSX_REGNO_P (op0_regno))
3210     {
3211       fldDM = element << 1;
3212       if (!BYTES_BIG_ENDIAN)
3213         fldDM = 3 - fldDM;
3214       operands[3] = GEN_INT (fldDM);
3215       return "xxpermdi %x0,%x1,%x1,%3";
3216     }
3217
3218   else
3219     gcc_unreachable ();
3220 }
3221   [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
3222
3223 ;; Optimize extracting a single scalar element from memory.
3224 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3225   [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3226         (vec_select:<VSX_D:VS_scalar>
3227          (match_operand:VSX_D 1 "memory_operand" "m,m")
3228          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3229    (clobber (match_scratch:P 3 "=&b,&b"))]
3230   "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3231   "#"
3232   "&& reload_completed"
3233   [(set (match_dup 0) (match_dup 4))]
3234 {
3235   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3236                                            operands[3], <VSX_D:VS_scalar>mode);
3237 }
3238   [(set_attr "type" "fpload,load")
3239    (set_attr "length" "8")])
3240
3241 ;; Optimize storing a single scalar element that is the right location to
3242 ;; memory
3243 (define_insn "*vsx_extract_<mode>_store"
3244   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3245         (vec_select:<VS_scalar>
3246          (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
3247          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3248   "VECTOR_MEM_VSX_P (<MODE>mode)"
3249   "@
3250    stfd%U0%X0 %1,%0
3251    stxsd%U0x %x1,%y0
3252    stxsd %1,%0"
3253   [(set_attr "type" "fpstore")
3254    (set_attr "length" "4")])
3255
3256 ;; Variable V2DI/V2DF extract shift
3257 (define_insn "vsx_vslo_<mode>"
3258   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3259         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3260                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3261                             UNSPEC_VSX_VSLO))]
3262   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3263   "vslo %0,%1,%2"
3264   [(set_attr "type" "vecperm")])
3265
3266 ;; Variable V2DI/V2DF extract
3267 (define_insn_and_split "vsx_extract_<mode>_var"
3268   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3269         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3270                              (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3271                             UNSPEC_VSX_EXTRACT))
3272    (clobber (match_scratch:DI 3 "=r,&b,&b"))
3273    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3274   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3275   "#"
3276   "&& reload_completed"
3277   [(const_int 0)]
3278 {
3279   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3280                                 operands[3], operands[4]);
3281   DONE;
3282 })
3283
3284 ;; Extract a SF element from V4SF
3285 (define_insn_and_split "vsx_extract_v4sf"
3286   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
3287         (vec_select:SF
3288          (match_operand:V4SF 1 "vsx_register_operand" "wa")
3289          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3290    (clobber (match_scratch:V4SF 3 "=0"))]
3291   "VECTOR_UNIT_VSX_P (V4SFmode)"
3292   "#"
3293   "&& 1"
3294   [(const_int 0)]
3295 {
3296   rtx op0 = operands[0];
3297   rtx op1 = operands[1];
3298   rtx op2 = operands[2];
3299   rtx op3 = operands[3];
3300   rtx tmp;
3301   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3302
3303   if (ele == 0)
3304     tmp = op1;
3305   else
3306     {
3307       if (GET_CODE (op3) == SCRATCH)
3308         op3 = gen_reg_rtx (V4SFmode);
3309       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3310       tmp = op3;
3311     }
3312   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3313   DONE;
3314 }
3315   [(set_attr "length" "8")
3316    (set_attr "type" "fp")])
3317
3318 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3319   [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3320         (vec_select:SF
3321          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3322          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3323    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3324   "VECTOR_MEM_VSX_P (V4SFmode)"
3325   "#"
3326   "&& reload_completed"
3327   [(set (match_dup 0) (match_dup 4))]
3328 {
3329   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3330                                            operands[3], SFmode);
3331 }
3332   [(set_attr "type" "fpload,fpload,fpload,load")
3333    (set_attr "length" "8")])
3334
3335 ;; Variable V4SF extract
3336 (define_insn_and_split "vsx_extract_v4sf_var"
3337   [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3338         (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3339                     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3340                    UNSPEC_VSX_EXTRACT))
3341    (clobber (match_scratch:DI 3 "=r,&b,&b"))
3342    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3343   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3344   "#"
3345   "&& reload_completed"
3346   [(const_int 0)]
3347 {
3348   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3349                                 operands[3], operands[4]);
3350   DONE;
3351 })
3352
3353 ;; Expand the builtin form of xxpermdi to canonical rtl.
3354 (define_expand "vsx_xxpermdi_<mode>"
3355   [(match_operand:VSX_L 0 "vsx_register_operand")
3356    (match_operand:VSX_L 1 "vsx_register_operand")
3357    (match_operand:VSX_L 2 "vsx_register_operand")
3358    (match_operand:QI 3 "u5bit_cint_operand")]
3359   "VECTOR_MEM_VSX_P (<MODE>mode)"
3360 {
3361   rtx target = operands[0];
3362   rtx op0 = operands[1];
3363   rtx op1 = operands[2];
3364   int mask = INTVAL (operands[3]);
3365   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3366   rtx perm1 = GEN_INT ((mask & 1) + 2);
3367   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3368
3369   if (<MODE>mode == V2DFmode)
3370     gen = gen_vsx_xxpermdi2_v2df_1;
3371   else
3372     {
3373       gen = gen_vsx_xxpermdi2_v2di_1;
3374       if (<MODE>mode != V2DImode)
3375         {
3376           target = gen_lowpart (V2DImode, target);
3377           op0 = gen_lowpart (V2DImode, op0);
3378           op1 = gen_lowpart (V2DImode, op1);
3379         }
3380     }
3381   emit_insn (gen (target, op0, op1, perm0, perm1));
3382   DONE;
3383 })
3384
3385 ;; Special version of xxpermdi that retains big-endian semantics.
3386 (define_expand "vsx_xxpermdi_<mode>_be"
3387   [(match_operand:VSX_L 0 "vsx_register_operand")
3388    (match_operand:VSX_L 1 "vsx_register_operand")
3389    (match_operand:VSX_L 2 "vsx_register_operand")
3390    (match_operand:QI 3 "u5bit_cint_operand")]
3391   "VECTOR_MEM_VSX_P (<MODE>mode)"
3392 {
3393   rtx target = operands[0];
3394   rtx op0 = operands[1];
3395   rtx op1 = operands[2];
3396   int mask = INTVAL (operands[3]);
3397   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3398   rtx perm1 = GEN_INT ((mask & 1) + 2);
3399   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3400
3401   if (<MODE>mode == V2DFmode)
3402     gen = gen_vsx_xxpermdi2_v2df_1;
3403   else
3404     {
3405       gen = gen_vsx_xxpermdi2_v2di_1;
3406       if (<MODE>mode != V2DImode)
3407         {
3408           target = gen_lowpart (V2DImode, target);
3409           op0 = gen_lowpart (V2DImode, op0);
3410           op1 = gen_lowpart (V2DImode, op1);
3411         }
3412     }
3413   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3414      transformation we don't want; it is necessary for
3415      rs6000_expand_vec_perm_const_1 but not for this use.  So we
3416      prepare for that by reversing the transformation here.  */
3417   if (BYTES_BIG_ENDIAN)
3418     emit_insn (gen (target, op0, op1, perm0, perm1));
3419   else
3420     {
3421       rtx p0 = GEN_INT (3 - INTVAL (perm1));
3422       rtx p1 = GEN_INT (3 - INTVAL (perm0));
3423       emit_insn (gen (target, op1, op0, p0, p1));
3424     }
3425   DONE;
3426 })
3427
3428 (define_insn "vsx_xxpermdi2_<mode>_1"
3429   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3430         (vec_select:VSX_D
3431           (vec_concat:<VS_double>
3432             (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3433             (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3434           (parallel [(match_operand 3 "const_0_to_1_operand" "")
3435                      (match_operand 4 "const_2_to_3_operand" "")])))]
3436   "VECTOR_MEM_VSX_P (<MODE>mode)"
3437 {
3438   int op3, op4, mask;
3439
3440   /* For little endian, swap operands and invert/swap selectors
3441      to get the correct xxpermdi.  The operand swap sets up the
3442      inputs as a little endian array.  The selectors are swapped
3443      because they are defined to use big endian ordering.  The
3444      selectors are inverted to get the correct doublewords for
3445      little endian ordering.  */
3446   if (BYTES_BIG_ENDIAN)
3447     {
3448       op3 = INTVAL (operands[3]);
3449       op4 = INTVAL (operands[4]);
3450     }
3451   else
3452     {
3453       op3 = 3 - INTVAL (operands[4]);
3454       op4 = 3 - INTVAL (operands[3]);
3455     }
3456
3457   mask = (op3 << 1) | (op4 - 2);
3458   operands[3] = GEN_INT (mask);
3459
3460   if (BYTES_BIG_ENDIAN)
3461     return "xxpermdi %x0,%x1,%x2,%3";
3462   else
3463     return "xxpermdi %x0,%x2,%x1,%3";
3464 }
3465   [(set_attr "type" "vecperm")])
3466
3467 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3468 ;; none of the small types were allowed in a vector register, so we had to
3469 ;; extract to a DImode and either do a direct move or store.
3470 (define_expand  "vsx_extract_<mode>"
3471   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3472                    (vec_select:<VS_scalar>
3473                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3474                     (parallel [(match_operand:QI 2 "const_int_operand")])))
3475               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3476   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3477 {
3478   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3479   if (TARGET_P9_VECTOR)
3480     {
3481       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3482                                             operands[2]));
3483       DONE;
3484     }
3485 })
3486
3487 (define_insn "vsx_extract_<mode>_p9"
3488   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3489         (vec_select:<VS_scalar>
3490          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3491          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3492    (clobber (match_scratch:SI 3 "=r,X"))]
3493   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3494 {
3495   if (which_alternative == 0)
3496     return "#";
3497
3498   else
3499     {
3500       HOST_WIDE_INT elt = INTVAL (operands[2]);
3501       HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3502                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3503                                : elt);
3504
3505       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3506       HOST_WIDE_INT offset = unit_size * elt_adj;
3507
3508       operands[2] = GEN_INT (offset);
3509       if (unit_size == 4)
3510         return "xxextractuw %x0,%x1,%2";
3511       else
3512         return "vextractu<wd> %0,%1,%2";
3513     }
3514 }
3515   [(set_attr "type" "vecsimple")])
3516
3517 (define_split
3518   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3519         (vec_select:<VS_scalar>
3520          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3521          (parallel [(match_operand:QI 2 "const_int_operand")])))
3522    (clobber (match_operand:SI 3 "int_reg_operand"))]
3523   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3524   [(const_int 0)]
3525 {
3526   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3527   rtx op1 = operands[1];
3528   rtx op2 = operands[2];
3529   rtx op3 = operands[3];
3530   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3531
3532   emit_move_insn (op3, GEN_INT (offset));
3533   if (BYTES_BIG_ENDIAN)
3534     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3535   else
3536     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3537   DONE;
3538 })
3539
3540 ;; Optimize zero extracts to eliminate the AND after the extract.
3541 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3542   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3543         (zero_extend:DI
3544          (vec_select:<VS_scalar>
3545           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3546           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3547    (clobber (match_scratch:SI 3 "=r,X"))]
3548   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3549   "#"
3550   "&& reload_completed"
3551   [(parallel [(set (match_dup 4)
3552                    (vec_select:<VS_scalar>
3553                     (match_dup 1)
3554                     (parallel [(match_dup 2)])))
3555               (clobber (match_dup 3))])]
3556 {
3557   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3558 })
3559
3560 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3561 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3562   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3563         (vec_select:<VS_scalar>
3564          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3565          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3566    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3567    (clobber (match_scratch:SI 4 "=X,&r"))]
3568   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3569   "#"
3570   "&& reload_completed"
3571   [(parallel [(set (match_dup 3)
3572                    (vec_select:<VS_scalar>
3573                     (match_dup 1)
3574                     (parallel [(match_dup 2)])))
3575               (clobber (match_dup 4))])
3576    (set (match_dup 0)
3577         (match_dup 3))])
3578
3579 (define_insn_and_split  "*vsx_extract_si"
3580   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3581         (vec_select:SI
3582          (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3583          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3584    (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3585   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3586   "#"
3587   "&& reload_completed"
3588   [(const_int 0)]
3589 {
3590   rtx dest = operands[0];
3591   rtx src = operands[1];
3592   rtx element = operands[2];
3593   rtx vec_tmp = operands[3];
3594   int value;
3595
3596   if (!BYTES_BIG_ENDIAN)
3597     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3598
3599   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3600      instruction.  */
3601   value = INTVAL (element);
3602   if (value != 1)
3603     emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3604   else
3605     vec_tmp = src;
3606
3607   if (MEM_P (operands[0]))
3608     {
3609       if (can_create_pseudo_p ())
3610         dest = rs6000_address_for_fpconvert (dest);
3611
3612       if (TARGET_P8_VECTOR)
3613         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3614       else
3615         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3616     }
3617
3618   else if (TARGET_P8_VECTOR)
3619     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3620   else
3621     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3622                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
3623
3624   DONE;
3625 }
3626   [(set_attr "type" "mftgpr,vecperm,fpstore")
3627    (set_attr "length" "8")])
3628
3629 (define_insn_and_split  "*vsx_extract_<mode>_p8"
3630   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3631         (vec_select:<VS_scalar>
3632          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3633          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3634    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3635   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3636    && !TARGET_P9_VECTOR"
3637   "#"
3638   "&& reload_completed"
3639   [(const_int 0)]
3640 {
3641   rtx dest = operands[0];
3642   rtx src = operands[1];
3643   rtx element = operands[2];
3644   rtx vec_tmp = operands[3];
3645   int value;
3646
3647   if (!BYTES_BIG_ENDIAN)
3648     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3649
3650   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3651      instruction.  */
3652   value = INTVAL (element);
3653   if (<MODE>mode == V16QImode)
3654     {
3655       if (value != 7)
3656         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3657       else
3658         vec_tmp = src;
3659     }
3660   else if (<MODE>mode == V8HImode)
3661     {
3662       if (value != 3)
3663         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3664       else
3665         vec_tmp = src;
3666     }
3667   else
3668     gcc_unreachable ();
3669
3670   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3671                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
3672   DONE;
3673 }
3674   [(set_attr "type" "mftgpr")])
3675
3676 ;; Optimize extracting a single scalar element from memory.
3677 (define_insn_and_split "*vsx_extract_<mode>_load"
3678   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3679         (vec_select:<VS_scalar>
3680          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3681          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3682    (clobber (match_scratch:DI 3 "=&b"))]
3683   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3684   "#"
3685   "&& reload_completed"
3686   [(set (match_dup 0) (match_dup 4))]
3687 {
3688   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3689                                            operands[3], <VS_scalar>mode);
3690 }
3691   [(set_attr "type" "load")
3692    (set_attr "length" "8")])
3693
3694 ;; Variable V16QI/V8HI/V4SI extract
3695 (define_insn_and_split "vsx_extract_<mode>_var"
3696   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3697         (unspec:<VS_scalar>
3698          [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3699           (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3700          UNSPEC_VSX_EXTRACT))
3701    (clobber (match_scratch:DI 3 "=r,r,&b"))
3702    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3703   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3704   "#"
3705   "&& reload_completed"
3706   [(const_int 0)]
3707 {
3708   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3709                                 operands[3], operands[4]);
3710   DONE;
3711 })
3712
3713 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3714   [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3715         (zero_extend:SDI
3716          (unspec:<VSX_EXTRACT_I:VS_scalar>
3717           [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3718            (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3719           UNSPEC_VSX_EXTRACT)))
3720    (clobber (match_scratch:DI 3 "=r,r,&b"))
3721    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3722   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3723   "#"
3724   "&& reload_completed"
3725   [(const_int 0)]
3726 {
3727   machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3728   rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3729                                 operands[1], operands[2],
3730                                 operands[3], operands[4]);
3731   DONE;
3732 })
3733
3734 ;; VSX_EXTRACT optimizations
3735 ;; Optimize double d = (double) vec_extract (vi, <n>)
3736 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3737 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3738   [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3739         (any_float:DF
3740          (vec_select:SI
3741           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3742           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3743    (clobber (match_scratch:V4SI 3 "=v"))]
3744   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3745   "#"
3746   "&& 1"
3747   [(const_int 0)]
3748 {
3749   rtx dest = operands[0];
3750   rtx src = operands[1];
3751   rtx element = operands[2];
3752   rtx v4si_tmp = operands[3];
3753   int value;
3754
3755   if (!BYTES_BIG_ENDIAN)
3756     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3757
3758   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3759      instruction.  */
3760   value = INTVAL (element);
3761   if (value != 0)
3762     {
3763       if (GET_CODE (v4si_tmp) == SCRATCH)
3764         v4si_tmp = gen_reg_rtx (V4SImode);
3765       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3766     }
3767   else
3768     v4si_tmp = src;
3769
3770   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3771   DONE;
3772 })
3773
3774 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3775 ;; where <type> is a floating point type that supported by the hardware that is
3776 ;; not double.  First convert the value to double, and then to the desired
3777 ;; type.
3778 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3779   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3780         (any_float:VSX_EXTRACT_FL
3781          (vec_select:SI
3782           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3783           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3784    (clobber (match_scratch:V4SI 3 "=v"))
3785    (clobber (match_scratch:DF 4 "=ws"))]
3786   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3787   "#"
3788   "&& 1"
3789   [(const_int 0)]
3790 {
3791   rtx dest = operands[0];
3792   rtx src = operands[1];
3793   rtx element = operands[2];
3794   rtx v4si_tmp = operands[3];
3795   rtx df_tmp = operands[4];
3796   int value;
3797
3798   if (!BYTES_BIG_ENDIAN)
3799     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3800
3801   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3802      instruction.  */
3803   value = INTVAL (element);
3804   if (value != 0)
3805     {
3806       if (GET_CODE (v4si_tmp) == SCRATCH)
3807         v4si_tmp = gen_reg_rtx (V4SImode);
3808       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3809     }
3810   else
3811     v4si_tmp = src;
3812
3813   if (GET_CODE (df_tmp) == SCRATCH)
3814     df_tmp = gen_reg_rtx (DFmode);
3815
3816   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3817
3818   if (<MODE>mode == SFmode)
3819     emit_insn (gen_truncdfsf2 (dest, df_tmp));
3820   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3821     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3822   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3823            && TARGET_FLOAT128_HW)
3824     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3825   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3826     emit_insn (gen_extenddfif2 (dest, df_tmp));
3827   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3828     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3829   else
3830     gcc_unreachable ();
3831
3832   DONE;
3833 })
3834
3835 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3836 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3837 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3838 ;; vector short or vector unsigned short.
3839 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3840   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3841         (float:FL_CONV
3842          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3843           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3844           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3845    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3846   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3847    && TARGET_P9_VECTOR"
3848   "#"
3849   "&& reload_completed"
3850   [(parallel [(set (match_dup 3)
3851                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3852                     (match_dup 1)
3853                     (parallel [(match_dup 2)])))
3854               (clobber (scratch:SI))])
3855    (set (match_dup 4)
3856         (sign_extend:DI (match_dup 3)))
3857    (set (match_dup 0)
3858         (float:<FL_CONV:MODE> (match_dup 4)))]
3859 {
3860   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3861 })
3862
3863 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3864   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3865         (unsigned_float:FL_CONV
3866          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3867           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3868           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3869    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3870   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3871    && TARGET_P9_VECTOR"
3872   "#"
3873   "&& reload_completed"
3874   [(parallel [(set (match_dup 3)
3875                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3876                     (match_dup 1)
3877                     (parallel [(match_dup 2)])))
3878               (clobber (scratch:SI))])
3879    (set (match_dup 0)
3880         (float:<FL_CONV:MODE> (match_dup 4)))]
3881 {
3882   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3883 })
3884
3885 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3886 (define_insn "vsx_set_<mode>_p9"
3887   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3888         (unspec:VSX_EXTRACT_I
3889          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3890           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3891           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3892          UNSPEC_VSX_SET))]
3893   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3894 {
3895   int ele = INTVAL (operands[3]);
3896   int nunits = GET_MODE_NUNITS (<MODE>mode);
3897
3898   if (!BYTES_BIG_ENDIAN)
3899     ele = nunits - 1 - ele;
3900
3901   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3902   if (<MODE>mode == V4SImode)
3903     return "xxinsertw %x0,%x2,%3";
3904   else
3905     return "vinsert<wd> %0,%2,%3";
3906 }
3907   [(set_attr "type" "vecperm")])
3908
3909 (define_insn_and_split "vsx_set_v4sf_p9"
3910   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3911         (unspec:V4SF
3912          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3913           (match_operand:SF 2 "gpc_reg_operand" "ww")
3914           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3915          UNSPEC_VSX_SET))
3916    (clobber (match_scratch:SI 4 "=&wJwK"))]
3917   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3918   "#"
3919   "&& reload_completed"
3920   [(set (match_dup 5)
3921         (unspec:V4SF [(match_dup 2)]
3922                      UNSPEC_VSX_CVDPSPN))
3923    (parallel [(set (match_dup 4)
3924                    (vec_select:SI (match_dup 6)
3925                                   (parallel [(match_dup 7)])))
3926               (clobber (scratch:SI))])
3927    (set (match_dup 8)
3928         (unspec:V4SI [(match_dup 8)
3929                       (match_dup 4)
3930                       (match_dup 3)]
3931                      UNSPEC_VSX_SET))]
3932 {
3933   unsigned int tmp_regno = reg_or_subregno (operands[4]);
3934
3935   operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3936   operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3937   operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
3938   operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3939 }
3940   [(set_attr "type" "vecperm")
3941    (set_attr "length" "12")])
3942
3943 ;; Special case setting 0.0f to a V4SF element
3944 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3945   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3946         (unspec:V4SF
3947          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3948           (match_operand:SF 2 "zero_fp_constant" "j")
3949           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3950          UNSPEC_VSX_SET))
3951    (clobber (match_scratch:SI 4 "=&wJwK"))]
3952   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3953   "#"
3954   "&& reload_completed"
3955   [(set (match_dup 4)
3956         (const_int 0))
3957    (set (match_dup 5)
3958         (unspec:V4SI [(match_dup 5)
3959                       (match_dup 4)
3960                       (match_dup 3)]
3961                      UNSPEC_VSX_SET))]
3962 {
3963   operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3964 }
3965   [(set_attr "type" "vecperm")
3966    (set_attr "length" "8")])
3967
3968 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
3969 ;; that is in the default scalar position (1 for big endian, 2 for little
3970 ;; endian).  We just need to do an xxinsertw since the element is in the
3971 ;; correct location.
3972
3973 (define_insn "*vsx_insert_extract_v4sf_p9"
3974   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3975         (unspec:V4SF
3976          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3977           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3978                          (parallel
3979                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3980           (match_operand:QI 4 "const_0_to_3_operand" "n")]
3981          UNSPEC_VSX_SET))]
3982   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
3983    && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
3984 {
3985   int ele = INTVAL (operands[4]);
3986
3987   if (!BYTES_BIG_ENDIAN)
3988     ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
3989
3990   operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
3991   return "xxinsertw %x0,%x2,%4";
3992 }
3993   [(set_attr "type" "vecperm")])
3994
3995 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
3996 ;; that is in the default scalar position (1 for big endian, 2 for little
3997 ;; endian).  Convert the insert/extract to int and avoid doing the conversion.
3998
3999 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4000   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4001         (unspec:V4SF
4002          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4003           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4004                          (parallel
4005                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4006           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4007          UNSPEC_VSX_SET))
4008    (clobber (match_scratch:SI 5 "=&wJwK"))]
4009   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4010    && TARGET_P9_VECTOR && TARGET_POWERPC64
4011    && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4012   "#"
4013   "&& 1"
4014   [(parallel [(set (match_dup 5)
4015                    (vec_select:SI (match_dup 6)
4016                                   (parallel [(match_dup 3)])))
4017               (clobber (scratch:SI))])
4018    (set (match_dup 7)
4019         (unspec:V4SI [(match_dup 8)
4020                       (match_dup 5)
4021                       (match_dup 4)]
4022                      UNSPEC_VSX_SET))]
4023 {
4024   if (GET_CODE (operands[5]) == SCRATCH)
4025     operands[5] = gen_reg_rtx (SImode);
4026
4027   operands[6] = gen_lowpart (V4SImode, operands[2]);
4028   operands[7] = gen_lowpart (V4SImode, operands[0]);
4029   operands[8] = gen_lowpart (V4SImode, operands[1]);
4030 }
4031   [(set_attr "type" "vecperm")])
4032
4033 ;; Expanders for builtins
4034 (define_expand "vsx_mergel_<mode>"
4035   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4036    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4037    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4038   "VECTOR_MEM_VSX_P (<MODE>mode)"
4039 {
4040   rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4041   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4042   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4043   emit_insn (gen_rtx_SET (operands[0], x));
4044   DONE;
4045 })
4046
4047 (define_expand "vsx_mergeh_<mode>"
4048   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4049    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4050    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4051   "VECTOR_MEM_VSX_P (<MODE>mode)"
4052 {
4053   rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4054   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4055   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4056   emit_insn (gen_rtx_SET (operands[0], x));
4057   DONE;
4058 })
4059
4060 ;; V2DF/V2DI splat
4061 ;; We separate the register splat insn from the memory splat insn to force the
4062 ;; register allocator to generate the indexed form of the SPLAT when it is
4063 ;; given an offsettable memory reference.  Otherwise, if the register and
4064 ;; memory insns were combined into a single insn, the register allocator will
4065 ;; load the value into a register, and then do a double word permute.
4066 (define_expand "vsx_splat_<mode>"
4067   [(set (match_operand:VSX_D 0 "vsx_register_operand")
4068         (vec_duplicate:VSX_D
4069          (match_operand:<VS_scalar> 1 "input_operand")))]
4070   "VECTOR_MEM_VSX_P (<MODE>mode)"
4071 {
4072   rtx op1 = operands[1];
4073   if (MEM_P (op1))
4074     operands[1] = rs6000_address_for_fpconvert (op1);
4075   else if (!REG_P (op1))
4076     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4077 })
4078
4079 (define_insn "vsx_splat_<mode>_reg"
4080   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4081         (vec_duplicate:VSX_D
4082          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
4083   "VECTOR_MEM_VSX_P (<MODE>mode)"
4084   "@
4085    xxpermdi %x0,%x1,%x1,0
4086    mtvsrdd %x0,%1,%1"
4087   [(set_attr "type" "vecperm")])
4088
4089 (define_insn "vsx_splat_<VSX_D:mode>_mem"
4090   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4091         (vec_duplicate:VSX_D
4092          (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4093   "VECTOR_MEM_VSX_P (<MODE>mode)"
4094   "lxvdsx %x0,%y1"
4095   [(set_attr "type" "vecload")])
4096
4097 ;; V4SI splat support
4098 (define_insn "vsx_splat_v4si"
4099   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4100         (vec_duplicate:V4SI
4101          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4102   "TARGET_P9_VECTOR"
4103   "@
4104    mtvsrws %x0,%1
4105    lxvwsx %x0,%y1"
4106   [(set_attr "type" "vecperm,vecload")])
4107
4108 ;; SImode is not currently allowed in vector registers.  This pattern
4109 ;; allows us to use direct move to get the value in a vector register
4110 ;; so that we can use XXSPLTW
4111 (define_insn "vsx_splat_v4si_di"
4112   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4113         (vec_duplicate:V4SI
4114          (truncate:SI
4115           (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4116   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4117   "@
4118    xxspltw %x0,%x1,1
4119    mtvsrws %x0,%1"
4120   [(set_attr "type" "vecperm")])
4121
4122 ;; V4SF splat (ISA 3.0)
4123 (define_insn_and_split "vsx_splat_v4sf"
4124   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4125         (vec_duplicate:V4SF
4126          (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
4127   "TARGET_P9_VECTOR"
4128   "@
4129    lxvwsx %x0,%y1
4130    #
4131    mtvsrws %x0,%1"
4132   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4133   [(set (match_dup 0)
4134         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4135    (set (match_dup 0)
4136         (unspec:V4SF [(match_dup 0)
4137                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4138   ""
4139   [(set_attr "type" "vecload,vecperm,mftgpr")
4140    (set_attr "length" "4,8,4")])
4141
4142 ;; V4SF/V4SI splat from a vector element
4143 (define_insn "vsx_xxspltw_<mode>"
4144   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4145         (vec_duplicate:VSX_W
4146          (vec_select:<VS_scalar>
4147           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4148           (parallel
4149            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4150   "VECTOR_MEM_VSX_P (<MODE>mode)"
4151 {
4152   if (!BYTES_BIG_ENDIAN)
4153     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4154
4155   return "xxspltw %x0,%x1,%2";
4156 }
4157   [(set_attr "type" "vecperm")])
4158
4159 (define_insn "vsx_xxspltw_<mode>_direct"
4160   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4161         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4162                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4163                       UNSPEC_VSX_XXSPLTW))]
4164   "VECTOR_MEM_VSX_P (<MODE>mode)"
4165   "xxspltw %x0,%x1,%2"
4166   [(set_attr "type" "vecperm")])
4167
4168 ;; V16QI/V8HI splat support on ISA 2.07
4169 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4170   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4171         (vec_duplicate:VSX_SPLAT_I
4172          (truncate:<VS_scalar>
4173           (match_operand:DI 1 "altivec_register_operand" "v"))))]
4174   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4175   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4176   [(set_attr "type" "vecperm")])
4177
4178 ;; V2DF/V2DI splat for use by vec_splat builtin
4179 (define_insn "vsx_xxspltd_<mode>"
4180   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4181         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4182                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4183                       UNSPEC_VSX_XXSPLTD))]
4184   "VECTOR_MEM_VSX_P (<MODE>mode)"
4185 {
4186   if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4187       || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4188     return "xxpermdi %x0,%x1,%x1,0";
4189   else
4190     return "xxpermdi %x0,%x1,%x1,3";
4191 }
4192   [(set_attr "type" "vecperm")])
4193
4194 ;; V4SF/V4SI interleave
4195 (define_insn "vsx_xxmrghw_<mode>"
4196   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4197         (vec_select:VSX_W
4198           (vec_concat:<VS_double>
4199             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4200             (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
4201           (parallel [(const_int 0) (const_int 4)
4202                      (const_int 1) (const_int 5)])))]
4203   "VECTOR_MEM_VSX_P (<MODE>mode)"
4204 {
4205   if (BYTES_BIG_ENDIAN)
4206     return "xxmrghw %x0,%x1,%x2";
4207   else
4208     return "xxmrglw %x0,%x2,%x1";
4209 }
4210   [(set_attr "type" "vecperm")])
4211
4212 (define_insn "vsx_xxmrglw_<mode>"
4213   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4214         (vec_select:VSX_W
4215           (vec_concat:<VS_double>
4216             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4217             (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
4218           (parallel [(const_int 2) (const_int 6)
4219                      (const_int 3) (const_int 7)])))]
4220   "VECTOR_MEM_VSX_P (<MODE>mode)"
4221 {
4222   if (BYTES_BIG_ENDIAN)
4223     return "xxmrglw %x0,%x1,%x2";
4224   else
4225     return "xxmrghw %x0,%x2,%x1";
4226 }
4227   [(set_attr "type" "vecperm")])
4228
4229 ;; Shift left double by word immediate
4230 (define_insn "vsx_xxsldwi_<mode>"
4231   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4232         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4233                        (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
4234                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
4235                       UNSPEC_VSX_SLDWI))]
4236   "VECTOR_MEM_VSX_P (<MODE>mode)"
4237   "xxsldwi %x0,%x1,%x2,%3"
4238   [(set_attr "type" "vecperm")])
4239
4240 \f
4241 ;; Vector reduction insns and splitters
4242
4243 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4244   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4245         (VEC_reduc:V2DF
4246          (vec_concat:V2DF
4247           (vec_select:DF
4248            (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4249            (parallel [(const_int 1)]))
4250           (vec_select:DF
4251            (match_dup 1)
4252            (parallel [(const_int 0)])))
4253          (match_dup 1)))
4254    (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4255   "VECTOR_UNIT_VSX_P (V2DFmode)"
4256   "#"
4257   ""
4258   [(const_int 0)]
4259 {
4260   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4261              ? gen_reg_rtx (V2DFmode)
4262              : operands[2];
4263   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4264   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4265   DONE;
4266 }
4267   [(set_attr "length" "8")
4268    (set_attr "type" "veccomplex")])
4269
4270 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4271   [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4272         (VEC_reduc:V4SF
4273          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4274          (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4275    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4276    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4277   "VECTOR_UNIT_VSX_P (V4SFmode)"
4278   "#"
4279   ""
4280   [(const_int 0)]
4281 {
4282   rtx op0 = operands[0];
4283   rtx op1 = operands[1];
4284   rtx tmp2, tmp3, tmp4;
4285
4286   if (can_create_pseudo_p ())
4287     {
4288       tmp2 = gen_reg_rtx (V4SFmode);
4289       tmp3 = gen_reg_rtx (V4SFmode);
4290       tmp4 = gen_reg_rtx (V4SFmode);
4291     }
4292   else
4293     {
4294       tmp2 = operands[2];
4295       tmp3 = operands[3];
4296       tmp4 = tmp2;
4297     }
4298
4299   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4300   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4301   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4302   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4303   DONE;
4304 }
4305   [(set_attr "length" "16")
4306    (set_attr "type" "veccomplex")])
4307
4308 ;; Combiner patterns with the vector reduction patterns that knows we can get
4309 ;; to the top element of the V2DF array without doing an extract.
4310
4311 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4312   [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
4313         (vec_select:DF
4314          (VEC_reduc:V2DF
4315           (vec_concat:V2DF
4316            (vec_select:DF
4317             (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4318             (parallel [(const_int 1)]))
4319            (vec_select:DF
4320             (match_dup 1)
4321             (parallel [(const_int 0)])))
4322           (match_dup 1))
4323          (parallel [(const_int 1)])))
4324    (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4325   "VECTOR_UNIT_VSX_P (V2DFmode)"
4326   "#"
4327   ""
4328   [(const_int 0)]
4329 {
4330   rtx hi = gen_highpart (DFmode, operands[1]);
4331   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4332             ? gen_reg_rtx (DFmode)
4333             : operands[2];
4334
4335   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4336   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4337   DONE;
4338 }
4339   [(set_attr "length" "8")
4340    (set_attr "type" "veccomplex")])
4341
4342 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4343   [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4344         (vec_select:SF
4345          (VEC_reduc:V4SF
4346           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4347           (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4348          (parallel [(const_int 3)])))
4349    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4350    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4351    (clobber (match_scratch:V4SF 4 "=0,0"))]
4352   "VECTOR_UNIT_VSX_P (V4SFmode)"
4353   "#"
4354   ""
4355   [(const_int 0)]
4356 {
4357   rtx op0 = operands[0];
4358   rtx op1 = operands[1];
4359   rtx tmp2, tmp3, tmp4, tmp5;
4360
4361   if (can_create_pseudo_p ())
4362     {
4363       tmp2 = gen_reg_rtx (V4SFmode);
4364       tmp3 = gen_reg_rtx (V4SFmode);
4365       tmp4 = gen_reg_rtx (V4SFmode);
4366       tmp5 = gen_reg_rtx (V4SFmode);
4367     }
4368   else
4369     {
4370       tmp2 = operands[2];
4371       tmp3 = operands[3];
4372       tmp4 = tmp2;
4373       tmp5 = operands[4];
4374     }
4375
4376   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4377   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4378   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4379   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4380   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4381   DONE;
4382 }
4383   [(set_attr "length" "20")
4384    (set_attr "type" "veccomplex")])
4385
4386 \f
4387 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4388 (define_peephole
4389   [(set (match_operand:P 0 "base_reg_operand")
4390         (match_operand:P 1 "short_cint_operand"))
4391    (set (match_operand:VSX_M 2 "vsx_register_operand")
4392         (mem:VSX_M (plus:P (match_dup 0)
4393                            (match_operand:P 3 "int_reg_operand"))))]
4394   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4395   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4396   [(set_attr "length" "8")
4397    (set_attr "type" "vecload")])
4398
4399 (define_peephole
4400   [(set (match_operand:P 0 "base_reg_operand")
4401         (match_operand:P 1 "short_cint_operand"))
4402    (set (match_operand:VSX_M 2 "vsx_register_operand")
4403         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4404                            (match_dup 0))))]
4405   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4406   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4407   [(set_attr "length" "8")
4408    (set_attr "type" "vecload")])
4409
4410 \f
4411 ;; ISA 3.0 vector extend sign support
4412
4413 (define_insn "vsx_sign_extend_qi_<mode>"
4414   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4415         (unspec:VSINT_84
4416          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4417          UNSPEC_VSX_SIGN_EXTEND))]
4418   "TARGET_P9_VECTOR"
4419   "vextsb2<wd> %0,%1"
4420   [(set_attr "type" "vecexts")])
4421
4422 (define_insn "vsx_sign_extend_hi_<mode>"
4423   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4424         (unspec:VSINT_84
4425          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4426          UNSPEC_VSX_SIGN_EXTEND))]
4427   "TARGET_P9_VECTOR"
4428   "vextsh2<wd> %0,%1"
4429   [(set_attr "type" "vecexts")])
4430
4431 (define_insn "*vsx_sign_extend_si_v2di"
4432   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4433         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4434                      UNSPEC_VSX_SIGN_EXTEND))]
4435   "TARGET_P9_VECTOR"
4436   "vextsw2d %0,%1"
4437   [(set_attr "type" "vecexts")])
4438
4439 \f
4440 ;; ISA 3.0 Binary Floating-Point Support
4441
4442 ;; VSX Scalar Extract Exponent Quad-Precision
4443 (define_insn "xsxexpqp_<mode>"
4444   [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4445         (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4446          UNSPEC_VSX_SXEXPDP))]
4447   "TARGET_P9_VECTOR"
4448   "xsxexpqp %0,%1"
4449   [(set_attr "type" "vecmove")])
4450
4451 ;; VSX Scalar Extract Exponent Double-Precision
4452 (define_insn "xsxexpdp"
4453   [(set (match_operand:DI 0 "register_operand" "=r")
4454         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4455          UNSPEC_VSX_SXEXPDP))]
4456   "TARGET_P9_VECTOR && TARGET_64BIT"
4457   "xsxexpdp %0,%x1"
4458   [(set_attr "type" "integer")])
4459
4460 ;; VSX Scalar Extract Significand Quad-Precision
4461 (define_insn "xsxsigqp_<mode>"
4462   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4463         (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4464          UNSPEC_VSX_SXSIG))]
4465   "TARGET_P9_VECTOR"
4466   "xsxsigqp %0,%1"
4467   [(set_attr "type" "vecmove")])
4468
4469 ;; VSX Scalar Extract Significand Double-Precision
4470 (define_insn "xsxsigdp"
4471   [(set (match_operand:DI 0 "register_operand" "=r")
4472         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4473          UNSPEC_VSX_SXSIG))]
4474   "TARGET_P9_VECTOR && TARGET_64BIT"
4475   "xsxsigdp %0,%x1"
4476   [(set_attr "type" "integer")])
4477
4478 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4479 (define_insn "xsiexpqpf_<mode>"
4480   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4481         (unspec:IEEE128
4482          [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4483           (match_operand:DI 2 "altivec_register_operand" "v")]
4484          UNSPEC_VSX_SIEXPQP))]
4485   "TARGET_P9_VECTOR"
4486   "xsiexpqp %0,%1,%2"
4487   [(set_attr "type" "vecmove")])
4488
4489 ;; VSX Scalar Insert Exponent Quad-Precision
4490 (define_insn "xsiexpqp_<mode>"
4491   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4492         (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4493                          (match_operand:DI 2 "altivec_register_operand" "v")]
4494          UNSPEC_VSX_SIEXPQP))]
4495   "TARGET_P9_VECTOR"
4496   "xsiexpqp %0,%1,%2"
4497   [(set_attr "type" "vecmove")])
4498
4499 ;; VSX Scalar Insert Exponent Double-Precision
4500 (define_insn "xsiexpdp"
4501   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4502         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4503                     (match_operand:DI 2 "register_operand" "r")]
4504          UNSPEC_VSX_SIEXPDP))]
4505   "TARGET_P9_VECTOR && TARGET_64BIT"
4506   "xsiexpdp %x0,%1,%2"
4507   [(set_attr "type" "fpsimple")])
4508
4509 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4510 (define_insn "xsiexpdpf"
4511   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4512         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4513                     (match_operand:DI 2 "register_operand" "r")]
4514          UNSPEC_VSX_SIEXPDP))]
4515   "TARGET_P9_VECTOR && TARGET_64BIT"
4516   "xsiexpdp %x0,%1,%2"
4517   [(set_attr "type" "fpsimple")])
4518
4519 ;; VSX Scalar Compare Exponents Double-Precision
4520 (define_expand "xscmpexpdp_<code>"
4521   [(set (match_dup 3)
4522         (compare:CCFP
4523          (unspec:DF
4524           [(match_operand:DF 1 "vsx_register_operand" "wa")
4525            (match_operand:DF 2 "vsx_register_operand" "wa")]
4526           UNSPEC_VSX_SCMPEXPDP)
4527          (const_int 0)))
4528    (set (match_operand:SI 0 "register_operand" "=r")
4529         (CMP_TEST:SI (match_dup 3)
4530                      (const_int 0)))]
4531   "TARGET_P9_VECTOR"
4532 {
4533   operands[3] = gen_reg_rtx (CCFPmode);
4534 })
4535
4536 (define_insn "*xscmpexpdp"
4537   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4538         (compare:CCFP
4539          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4540                      (match_operand:DF 2 "vsx_register_operand" "wa")]
4541           UNSPEC_VSX_SCMPEXPDP)
4542          (match_operand:SI 3 "zero_constant" "j")))]
4543   "TARGET_P9_VECTOR"
4544   "xscmpexpdp %0,%x1,%x2"
4545   [(set_attr "type" "fpcompare")])
4546
4547 ;; VSX Scalar Test Data Class Quad-Precision
4548 ;;  (Expansion for scalar_test_data_class (__ieee128, int))
4549 ;;   (Has side effect of setting the lt bit if operand 1 is negative,
4550 ;;    setting the eq bit if any of the conditions tested by operand 2
4551 ;;    are satisfied, and clearing the gt and undordered bits to zero.)
4552 (define_expand "xststdcqp_<mode>"
4553   [(set (match_dup 3)
4554         (compare:CCFP
4555          (unspec:IEEE128
4556           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4557            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4558           UNSPEC_VSX_STSTDC)
4559          (const_int 0)))
4560    (set (match_operand:SI 0 "register_operand" "=r")
4561         (eq:SI (match_dup 3)
4562                (const_int 0)))]
4563   "TARGET_P9_VECTOR"
4564 {
4565   operands[3] = gen_reg_rtx (CCFPmode);
4566 })
4567
4568 ;; VSX Scalar Test Data Class Double- and Single-Precision
4569 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4570 ;;   if any of the conditions tested by operand 2 are satisfied.
4571 ;;   The gt and unordered bits are cleared to zero.)
4572 (define_expand "xststdc<Fvsx>"
4573   [(set (match_dup 3)
4574         (compare:CCFP
4575          (unspec:SFDF
4576           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4577            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4578           UNSPEC_VSX_STSTDC)
4579          (match_dup 4)))
4580    (set (match_operand:SI 0 "register_operand" "=r")
4581         (eq:SI (match_dup 3)
4582                (const_int 0)))]
4583   "TARGET_P9_VECTOR"
4584 {
4585   operands[3] = gen_reg_rtx (CCFPmode);
4586   operands[4] = CONST0_RTX (SImode);
4587 })
4588
4589 ;; The VSX Scalar Test Negative Quad-Precision
4590 (define_expand "xststdcnegqp_<mode>"
4591   [(set (match_dup 2)
4592         (compare:CCFP
4593          (unspec:IEEE128
4594           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4595            (const_int 0)]
4596           UNSPEC_VSX_STSTDC)
4597          (const_int 0)))
4598    (set (match_operand:SI 0 "register_operand" "=r")
4599         (lt:SI (match_dup 2)
4600                (const_int 0)))]
4601   "TARGET_P9_VECTOR"
4602 {
4603   operands[2] = gen_reg_rtx (CCFPmode);
4604 })
4605
4606 ;; The VSX Scalar Test Negative Double- and Single-Precision
4607 (define_expand "xststdcneg<Fvsx>"
4608   [(set (match_dup 2)
4609         (compare:CCFP
4610          (unspec:SFDF
4611           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4612            (const_int 0)]
4613           UNSPEC_VSX_STSTDC)
4614          (match_dup 3)))
4615    (set (match_operand:SI 0 "register_operand" "=r")
4616         (lt:SI (match_dup 2)
4617                (const_int 0)))]
4618   "TARGET_P9_VECTOR"
4619 {
4620   operands[2] = gen_reg_rtx (CCFPmode);
4621   operands[3] = CONST0_RTX (SImode);
4622 })
4623
4624 (define_insn "*xststdcqp_<mode>"
4625   [(set (match_operand:CCFP 0 "" "=y")
4626         (compare:CCFP
4627          (unspec:IEEE128
4628           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4629            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4630           UNSPEC_VSX_STSTDC)
4631          (const_int 0)))]
4632   "TARGET_P9_VECTOR"
4633   "xststdcqp %0,%1,%2"
4634   [(set_attr "type" "fpcompare")])
4635
4636 (define_insn "*xststdc<Fvsx>"
4637   [(set (match_operand:CCFP 0 "" "=y")
4638         (compare:CCFP
4639          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4640                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
4641           UNSPEC_VSX_STSTDC)
4642          (match_operand:SI 3 "zero_constant" "j")))]
4643   "TARGET_P9_VECTOR"
4644   "xststdc<Fvsx> %0,%x1,%2"
4645   [(set_attr "type" "fpcompare")])
4646
4647 ;; VSX Vector Extract Exponent Double and Single Precision
4648 (define_insn "xvxexp<VSs>"
4649   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4650         (unspec:VSX_F
4651          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4652          UNSPEC_VSX_VXEXP))]
4653   "TARGET_P9_VECTOR"
4654   "xvxexp<VSs> %x0,%x1"
4655   [(set_attr "type" "vecsimple")])
4656
4657 ;; VSX Vector Extract Significand Double and Single Precision
4658 (define_insn "xvxsig<VSs>"
4659   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4660         (unspec:VSX_F
4661          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4662          UNSPEC_VSX_VXSIG))]
4663   "TARGET_P9_VECTOR"
4664   "xvxsig<VSs> %x0,%x1"
4665   [(set_attr "type" "vecsimple")])
4666
4667 ;; VSX Vector Insert Exponent Double and Single Precision
4668 (define_insn "xviexp<VSs>"
4669   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4670         (unspec:VSX_F
4671          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4672           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4673          UNSPEC_VSX_VIEXP))]
4674   "TARGET_P9_VECTOR"
4675   "xviexp<VSs> %x0,%x1,%x2"
4676   [(set_attr "type" "vecsimple")])
4677
4678 ;; VSX Vector Test Data Class Double and Single Precision
4679 ;; The corresponding elements of the result vector are all ones
4680 ;; if any of the conditions tested by operand 3 are satisfied.
4681 (define_insn "xvtstdc<VSs>"
4682   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4683         (unspec:<VSI>
4684          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4685           (match_operand:SI 2 "u7bit_cint_operand" "n")]
4686          UNSPEC_VSX_VTSTDC))]
4687   "TARGET_P9_VECTOR"
4688   "xvtstdc<VSs> %x0,%x1,%2"
4689   [(set_attr "type" "vecsimple")])
4690
4691 ;; ISA 3.0 String Operations Support
4692
4693 ;; Compare vectors producing a vector result and a predicate, setting CR6
4694 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4695 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4696 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4697 ;; to use Power8 instructions.
4698 (define_insn "*vsx_ne_<mode>_p"
4699   [(set (reg:CC CR6_REGNO)
4700         (unspec:CC
4701          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4702                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4703          UNSPEC_PREDICATE))
4704    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4705         (ne:VSX_EXTRACT_I (match_dup 1)
4706                           (match_dup 2)))]
4707   "TARGET_P9_VECTOR"
4708   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4709   [(set_attr "type" "vecsimple")])
4710
4711 (define_insn "*vector_nez_<mode>_p"
4712   [(set (reg:CC CR6_REGNO)
4713         (unspec:CC [(unspec:VI
4714                      [(match_operand:VI 1 "gpc_reg_operand" "v")
4715                       (match_operand:VI 2 "gpc_reg_operand" "v")]
4716                      UNSPEC_NEZ_P)]
4717          UNSPEC_PREDICATE))
4718    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4719         (unspec:VI [(match_dup 1)
4720                     (match_dup 2)]
4721          UNSPEC_NEZ_P))]
4722   "TARGET_P9_VECTOR"
4723   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4724   [(set_attr "type" "vecsimple")])
4725
4726 ;; Return first position of match between vectors using natural order
4727 ;; for both LE and BE execution modes.
4728 (define_expand "first_match_index_<mode>"
4729   [(match_operand:SI 0 "register_operand")
4730    (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4731                (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4732   UNSPEC_VSX_FIRST_MATCH_INDEX)]
4733   "TARGET_P9_VECTOR"
4734 {
4735   int sh;
4736
4737   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4738   rtx not_result = gen_reg_rtx (<MODE>mode);
4739
4740   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4741                                              operands[2]));
4742   emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4743
4744   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4745
4746   if (<MODE>mode == V16QImode)
4747     {
4748       if (!BYTES_BIG_ENDIAN)
4749         emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4750       else
4751         emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4752     }
4753   else
4754     {
4755       rtx tmp = gen_reg_rtx (SImode);
4756       if (!BYTES_BIG_ENDIAN)
4757         emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4758       else
4759         emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4760       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4761     }
4762   DONE;
4763 })
4764
4765 ;; Return first position of match between vectors or end of string (EOS) using
4766 ;; natural element order for both LE and BE execution modes.
4767 (define_expand "first_match_or_eos_index_<mode>"
4768   [(match_operand:SI 0 "register_operand")
4769    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4770    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4771   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4772   "TARGET_P9_VECTOR"
4773 {
4774   int sh;
4775   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4776   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4777   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4778   rtx and_result = gen_reg_rtx (<MODE>mode);
4779   rtx result = gen_reg_rtx (<MODE>mode);
4780   rtx vzero = gen_reg_rtx (<MODE>mode);
4781
4782   /* Vector with zeros in elements that correspond to zeros in operands.  */
4783   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4784   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4785   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4786   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4787
4788   /* Vector with ones in elments that do not match.  */
4789   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4790                                              operands[2]));
4791
4792   /* Create vector with ones in elements where there was a zero in one of
4793      the source elements or the elements that match.  */
4794   emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4795   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4796
4797   if (<MODE>mode == V16QImode)
4798     {
4799       if (!BYTES_BIG_ENDIAN)
4800         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4801       else
4802         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4803     }
4804   else
4805     {
4806       rtx tmp = gen_reg_rtx (SImode);
4807       if (!BYTES_BIG_ENDIAN)
4808         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4809       else
4810         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4811       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4812     }
4813   DONE;
4814 })
4815
4816 ;; Return first position of mismatch between vectors using natural
4817 ;; element order for both LE and BE execution modes.
4818 (define_expand "first_mismatch_index_<mode>"
4819   [(match_operand:SI 0 "register_operand")
4820    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4821    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4822   UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4823   "TARGET_P9_VECTOR"
4824 {
4825   int sh;
4826   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4827
4828   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4829                                             operands[2]));
4830   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4831
4832   if (<MODE>mode == V16QImode)
4833     {
4834       if (!BYTES_BIG_ENDIAN)
4835         emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4836       else
4837         emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4838     }
4839   else
4840     {
4841       rtx tmp = gen_reg_rtx (SImode);
4842       if (!BYTES_BIG_ENDIAN)
4843         emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4844       else
4845         emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4846       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4847     }
4848   DONE;
4849 })
4850
4851 ;; Return first position of mismatch between vectors or end of string (EOS)
4852 ;; using natural element order for both LE and BE execution modes.
4853 (define_expand "first_mismatch_or_eos_index_<mode>"
4854   [(match_operand:SI 0 "register_operand")
4855    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4856    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4857   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4858   "TARGET_P9_VECTOR"
4859 {
4860   int sh;
4861   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4862   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4863   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4864   rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4865   rtx and_result = gen_reg_rtx (<MODE>mode);
4866   rtx result = gen_reg_rtx (<MODE>mode);
4867   rtx vzero = gen_reg_rtx (<MODE>mode);
4868
4869   /* Vector with zeros in elements that correspond to zeros in operands.  */
4870   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4871
4872   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4873   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4874   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4875
4876   /* Vector with ones in elments that match.  */
4877   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4878                                              operands[2]));
4879   emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4880
4881   /* Create vector with ones in elements where there was a zero in one of
4882      the source elements or the elements did not match.  */
4883   emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4884   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4885
4886   if (<MODE>mode == V16QImode)
4887     {
4888       if (!BYTES_BIG_ENDIAN)
4889         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4890       else
4891         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4892     }
4893   else
4894     {
4895       rtx tmp = gen_reg_rtx (SImode);
4896       if (!BYTES_BIG_ENDIAN)
4897         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4898       else
4899         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4900       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4901     }
4902   DONE;
4903 })
4904
4905 ;; Load VSX Vector with Length
4906 (define_expand "lxvl"
4907   [(set (match_dup 3)
4908         (ashift:DI (match_operand:DI 2 "register_operand")
4909                    (const_int 56)))
4910    (set (match_operand:V16QI 0 "vsx_register_operand")
4911         (unspec:V16QI
4912          [(match_operand:DI 1 "gpc_reg_operand")
4913           (mem:V16QI (match_dup 1))
4914           (match_dup 3)]
4915          UNSPEC_LXVL))]
4916   "TARGET_P9_VECTOR && TARGET_64BIT"
4917 {
4918   operands[3] = gen_reg_rtx (DImode);
4919 })
4920
4921 (define_insn "*lxvl"
4922   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4923         (unspec:V16QI
4924          [(match_operand:DI 1 "gpc_reg_operand" "b")
4925           (mem:V16QI (match_dup 1))
4926           (match_operand:DI 2 "register_operand" "r")]
4927          UNSPEC_LXVL))]
4928   "TARGET_P9_VECTOR && TARGET_64BIT"
4929   "lxvl %x0,%1,%2"
4930   [(set_attr "type" "vecload")])
4931
4932 (define_insn "lxvll"
4933   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4934         (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4935                        (mem:V16QI (match_dup 1))
4936                        (match_operand:DI 2 "register_operand" "r")]
4937                       UNSPEC_LXVLL))]
4938   "TARGET_P9_VECTOR"
4939   "lxvll %x0,%1,%2"
4940   [(set_attr "type" "vecload")])
4941
4942 ;; Expand for builtin xl_len_r
4943 (define_expand "xl_len_r"
4944   [(match_operand:V16QI 0 "vsx_register_operand")
4945    (match_operand:DI 1 "register_operand")
4946    (match_operand:DI 2 "register_operand")]
4947   ""
4948 {
4949   rtx shift_mask = gen_reg_rtx (V16QImode);
4950   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4951   rtx tmp = gen_reg_rtx (DImode);
4952
4953   emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4954   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4955   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4956   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4957              shift_mask));
4958   DONE;
4959 })
4960
4961 (define_insn "stxvll"
4962   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4963         (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4964                        (mem:V16QI (match_dup 1))
4965                        (match_operand:DI 2 "register_operand" "r")]
4966                       UNSPEC_STXVLL))]
4967   "TARGET_P9_VECTOR"
4968   "stxvll %x0,%1,%2"
4969   [(set_attr "type" "vecstore")])
4970
4971 ;; Store VSX Vector with Length
4972 (define_expand "stxvl"
4973   [(set (match_dup 3)
4974         (ashift:DI (match_operand:DI 2 "register_operand")
4975                    (const_int 56)))
4976    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
4977         (unspec:V16QI
4978          [(match_operand:V16QI 0 "vsx_register_operand")
4979           (mem:V16QI (match_dup 1))
4980           (match_dup 3)]
4981          UNSPEC_STXVL))]
4982   "TARGET_P9_VECTOR && TARGET_64BIT"
4983 {
4984   operands[3] = gen_reg_rtx (DImode);
4985 })
4986
4987 (define_insn "*stxvl"
4988   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4989         (unspec:V16QI
4990          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4991           (mem:V16QI (match_dup 1))
4992           (match_operand:DI 2 "register_operand" "r")]
4993          UNSPEC_STXVL))]
4994   "TARGET_P9_VECTOR && TARGET_64BIT"
4995   "stxvl %x0,%1,%2"
4996   [(set_attr "type" "vecstore")])
4997
4998 ;; Expand for builtin xst_len_r
4999 (define_expand "xst_len_r"
5000   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5001    (match_operand:DI 1 "register_operand" "b")
5002    (match_operand:DI 2 "register_operand" "r")]
5003   "UNSPEC_XST_LEN_R"
5004 {
5005   rtx shift_mask = gen_reg_rtx (V16QImode);
5006   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5007   rtx tmp = gen_reg_rtx (DImode);
5008
5009   emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5010   emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5011              shift_mask));
5012   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5013   emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5014   DONE;
5015 })
5016
5017 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5018 (define_insn "vcmpneb"
5019   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5020          (not:V16QI
5021            (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5022                      (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5023   "TARGET_P9_VECTOR"
5024   "vcmpneb %0,%1,%2"
5025   [(set_attr "type" "vecsimple")])
5026
5027 ;; Vector Compare Not Equal or Zero Byte
5028 (define_insn "vcmpnezb"
5029   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5030         (unspec:V16QI
5031          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5032           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5033          UNSPEC_VCMPNEZB))]
5034   "TARGET_P9_VECTOR"
5035   "vcmpnezb %0,%1,%2"
5036   [(set_attr "type" "vecsimple")])
5037
5038 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5039 (define_insn "vcmpneh"
5040   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5041         (not:V8HI
5042           (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5043                    (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5044   "TARGET_P9_VECTOR"
5045   "vcmpneh %0,%1,%2"
5046   [(set_attr "type" "vecsimple")])
5047
5048 ;; Vector Compare Not Equal or Zero Half Word
5049 (define_insn "vcmpnezh"
5050   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5051         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5052                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
5053          UNSPEC_VCMPNEZH))]
5054   "TARGET_P9_VECTOR"
5055   "vcmpnezh %0,%1,%2"
5056   [(set_attr "type" "vecsimple")])
5057
5058 ;; Vector Compare Not Equal Word (specified/not+eq:)
5059 (define_insn "vcmpnew"
5060   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5061         (not:V4SI
5062           (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5063                    (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5064   "TARGET_P9_VECTOR"
5065   "vcmpnew %0,%1,%2"
5066   [(set_attr "type" "vecsimple")])
5067
5068 ;; Vector Compare Not Equal or Zero Word
5069 (define_insn "vcmpnezw"
5070   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5071         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5072                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
5073          UNSPEC_VCMPNEZW))]
5074   "TARGET_P9_VECTOR"
5075   "vcmpnezw %0,%1,%2"
5076   [(set_attr "type" "vecsimple")])
5077
5078 ;; Vector Count Leading Zero Least-Significant Bits Byte
5079 (define_insn "vclzlsbb_<mode>"
5080   [(set (match_operand:SI 0 "register_operand" "=r")
5081         (unspec:SI
5082          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5083          UNSPEC_VCLZLSBB))]
5084   "TARGET_P9_VECTOR"
5085   "vclzlsbb %0,%1"
5086   [(set_attr "type" "vecsimple")])
5087
5088 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5089 (define_insn "vctzlsbb_<mode>"
5090   [(set (match_operand:SI 0 "register_operand" "=r")
5091         (unspec:SI
5092          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5093          UNSPEC_VCTZLSBB))]
5094   "TARGET_P9_VECTOR"
5095   "vctzlsbb %0,%1"
5096   [(set_attr "type" "vecsimple")])
5097
5098 ;; Vector Extract Unsigned Byte Left-Indexed
5099 (define_insn "vextublx"
5100   [(set (match_operand:SI 0 "register_operand" "=r")
5101         (unspec:SI
5102          [(match_operand:SI 1 "register_operand" "r")
5103           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5104          UNSPEC_VEXTUBLX))]
5105   "TARGET_P9_VECTOR"
5106   "vextublx %0,%1,%2"
5107   [(set_attr "type" "vecsimple")])
5108
5109 ;; Vector Extract Unsigned Byte Right-Indexed
5110 (define_insn "vextubrx"
5111   [(set (match_operand:SI 0 "register_operand" "=r")
5112         (unspec:SI
5113          [(match_operand:SI 1 "register_operand" "r")
5114           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5115          UNSPEC_VEXTUBRX))]
5116   "TARGET_P9_VECTOR"
5117   "vextubrx %0,%1,%2"
5118   [(set_attr "type" "vecsimple")])
5119
5120 ;; Vector Extract Unsigned Half Word Left-Indexed
5121 (define_insn "vextuhlx"
5122   [(set (match_operand:SI 0 "register_operand" "=r")
5123         (unspec:SI
5124          [(match_operand:SI 1 "register_operand" "r")
5125           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5126          UNSPEC_VEXTUHLX))]
5127   "TARGET_P9_VECTOR"
5128   "vextuhlx %0,%1,%2"
5129   [(set_attr "type" "vecsimple")])
5130
5131 ;; Vector Extract Unsigned Half Word Right-Indexed
5132 (define_insn "vextuhrx"
5133   [(set (match_operand:SI 0 "register_operand" "=r")
5134         (unspec:SI
5135          [(match_operand:SI 1 "register_operand" "r")
5136           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5137          UNSPEC_VEXTUHRX))]
5138   "TARGET_P9_VECTOR"
5139   "vextuhrx %0,%1,%2"
5140   [(set_attr "type" "vecsimple")])
5141
5142 ;; Vector Extract Unsigned Word Left-Indexed
5143 (define_insn "vextuwlx"
5144   [(set (match_operand:SI 0 "register_operand" "=r")
5145         (unspec:SI
5146          [(match_operand:SI 1 "register_operand" "r")
5147           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5148          UNSPEC_VEXTUWLX))]
5149   "TARGET_P9_VECTOR"
5150   "vextuwlx %0,%1,%2"
5151   [(set_attr "type" "vecsimple")])
5152
5153 ;; Vector Extract Unsigned Word Right-Indexed
5154 (define_insn "vextuwrx"
5155   [(set (match_operand:SI 0 "register_operand" "=r")
5156         (unspec:SI
5157          [(match_operand:SI 1 "register_operand" "r")
5158           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5159          UNSPEC_VEXTUWRX))]
5160   "TARGET_P9_VECTOR"
5161   "vextuwrx %0,%1,%2"
5162   [(set_attr "type" "vecsimple")])
5163
5164 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
5165 ;; endian version needs to adjust the byte number, and the V4SI element in
5166 ;; vinsert4b.
5167 (define_insn "extract4b"
5168   [(set (match_operand:V2DI 0 "vsx_register_operand")
5169        (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5170                      (match_operand:QI 2 "const_0_to_12_operand" "n")]
5171                     UNSPEC_XXEXTRACTUW))]
5172   "TARGET_P9_VECTOR"
5173 {
5174   if (!BYTES_BIG_ENDIAN)
5175     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5176
5177   return "xxextractuw %x0,%x1,%2";
5178 })
5179
5180 (define_expand "insert4b"
5181   [(set (match_operand:V16QI 0 "vsx_register_operand")
5182         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5183                        (match_operand:V16QI 2 "vsx_register_operand")
5184                        (match_operand:QI 3 "const_0_to_12_operand")]
5185                    UNSPEC_XXINSERTW))]
5186   "TARGET_P9_VECTOR"
5187 {
5188   if (!BYTES_BIG_ENDIAN)
5189     {
5190       rtx op1 = operands[1];
5191       rtx v4si_tmp = gen_reg_rtx (V4SImode);
5192       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5193       operands[1] = v4si_tmp;
5194       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5195     }
5196 })
5197
5198 (define_insn "*insert4b_internal"
5199   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5200         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5201                        (match_operand:V16QI 2 "vsx_register_operand" "0")
5202                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
5203                    UNSPEC_XXINSERTW))]
5204   "TARGET_P9_VECTOR"
5205   "xxinsertw %x0,%x1,%3"
5206   [(set_attr "type" "vecperm")])
5207
5208
5209 ;; Generate vector extract four float 32 values from left four elements
5210 ;; of eight element vector of float 16 values.
5211 (define_expand "vextract_fp_from_shorth"
5212   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5213         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5214    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5215   "TARGET_P9_VECTOR"
5216 {
5217   int i;
5218   int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5219   int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5220
5221   rtx rvals[16];
5222   rtx mask = gen_reg_rtx (V16QImode);
5223   rtx tmp = gen_reg_rtx (V16QImode);
5224   rtvec v;
5225
5226   for (i = 0; i < 16; i++)
5227     if (!BYTES_BIG_ENDIAN)
5228       rvals[i] = GEN_INT (vals_le[i]);
5229     else
5230       rvals[i] = GEN_INT (vals_be[i]);
5231
5232   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5233      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5234      src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5235      conversion instruction.  */
5236   v = gen_rtvec_v (16, rvals);
5237   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5238   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5239                                           operands[1], mask));
5240   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5241   DONE;
5242 })
5243
5244 ;; Generate vector extract four float 32 values from right four elements
5245 ;; of eight element vector of float 16 values.
5246 (define_expand "vextract_fp_from_shortl"
5247   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5248         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5249         UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5250   "TARGET_P9_VECTOR"
5251 {
5252   int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5253   int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5254
5255   int i;
5256   rtx rvals[16];
5257   rtx mask = gen_reg_rtx (V16QImode);
5258   rtx tmp = gen_reg_rtx (V16QImode);
5259   rtvec v;
5260
5261   for (i = 0; i < 16; i++)
5262     if (!BYTES_BIG_ENDIAN)
5263       rvals[i] = GEN_INT (vals_le[i]);
5264     else
5265       rvals[i] = GEN_INT (vals_be[i]);
5266
5267   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5268      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5269      src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5270      conversion instruction.  */
5271   v = gen_rtvec_v (16, rvals);
5272   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5273   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5274                                           operands[1], mask));
5275   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5276   DONE;
5277 })
5278
5279 ;; Support for ISA 3.0 vector byte reverse
5280
5281 ;; Swap all bytes with in a vector
5282 (define_insn "p9_xxbrq_v1ti"
5283   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5284         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5285   "TARGET_P9_VECTOR"
5286   "xxbrq %x0,%x1"
5287   [(set_attr "type" "vecperm")])
5288
5289 (define_expand "p9_xxbrq_v16qi"
5290   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5291    (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5292   "TARGET_P9_VECTOR"
5293 {
5294   rtx op0 = gen_reg_rtx (V1TImode);
5295   rtx op1 = gen_lowpart (V1TImode, operands[1]);
5296   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5297   emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5298   DONE;
5299 })
5300
5301 ;; Swap all bytes in each 64-bit element
5302 (define_insn "p9_xxbrd_v2di"
5303   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5304         (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5305   "TARGET_P9_VECTOR"
5306   "xxbrd %x0,%x1"
5307   [(set_attr "type" "vecperm")])
5308
5309 (define_expand "p9_xxbrd_v2df"
5310   [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5311    (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5312   "TARGET_P9_VECTOR"
5313 {
5314   rtx op0 = gen_reg_rtx (V2DImode);
5315   rtx op1 = gen_lowpart (V2DImode, operands[1]);
5316   emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5317   emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5318   DONE;
5319 })
5320
5321 ;; Swap all bytes in each 32-bit element
5322 (define_insn "p9_xxbrw_v4si"
5323   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5324         (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5325   "TARGET_P9_VECTOR"
5326   "xxbrw %x0,%x1"
5327   [(set_attr "type" "vecperm")])
5328
5329 (define_expand "p9_xxbrw_v4sf"
5330   [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5331    (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5332   "TARGET_P9_VECTOR"
5333 {
5334   rtx op0 = gen_reg_rtx (V4SImode);
5335   rtx op1 = gen_lowpart (V4SImode, operands[1]);
5336   emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5337   emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5338   DONE;
5339 })
5340
5341 ;; Swap all bytes in each element of vector
5342 (define_expand "revb_<mode>"
5343   [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5344    (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5345   ""
5346 {
5347   if (TARGET_P9_VECTOR)
5348     emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5349   else
5350     {
5351       /* Want to have the elements in reverse order relative
5352          to the endian mode in use, i.e. in LE mode, put elements
5353          in BE order.  */
5354       rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5355       emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5356                                            operands[1], sel));
5357     }
5358
5359   DONE;
5360 })
5361
5362 ;; Reversing bytes in vector char is just a NOP.
5363 (define_expand "revb_v16qi"
5364   [(set (match_operand:V16QI 0 "vsx_register_operand")
5365         (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5366   ""
5367 {
5368   emit_move_insn (operands[0], operands[1]);
5369   DONE;
5370 })
5371
5372 ;; Swap all bytes in each 16-bit element
5373 (define_insn "p9_xxbrh_v8hi"
5374   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5375         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5376   "TARGET_P9_VECTOR"
5377   "xxbrh %x0,%x1"
5378   [(set_attr "type" "vecperm")])
5379 \f
5380
5381 ;; Operand numbers for the following peephole2
5382 (define_constants
5383   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
5384    (SFBOOL_TMP_VSX               1)             ;; vector temporary
5385    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
5386    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
5387    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
5388    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
5389    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
5390    (SFBOOL_SHL_D                 7)             ;; shift left dest
5391    (SFBOOL_SHL_A                 8)             ;; shift left arg
5392    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
5393    (SFBOOL_MFVSR_A_V4SF         10)             ;; SFBOOL_MFVSR_A as V4SFmode
5394    (SFBOOL_BOOL_A_DI            11)             ;; SFBOOL_BOOL_A1/A2 as DImode
5395    (SFBOOL_TMP_VSX_DI           12)             ;; SFBOOL_TMP_VSX as DImode
5396    (SFBOOL_MTVSR_D_V4SF         13)])           ;; SFBOOL_MTVSRD_D as V4SFmode
5397
5398 ;; Attempt to optimize some common GLIBC operations using logical operations to
5399 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
5400 ;; after macro expansion that looks like:
5401 ;;
5402 ;;      typedef union {
5403 ;;        float value;
5404 ;;        uint32_t word;
5405 ;;      } ieee_float_shape_type;
5406 ;;
5407 ;;      float t1;
5408 ;;      int32_t is;
5409 ;;
5410 ;;      do {
5411 ;;        ieee_float_shape_type gf_u;
5412 ;;        gf_u.value = (t1);
5413 ;;        (is) = gf_u.word;
5414 ;;      } while (0);
5415 ;;
5416 ;;      do {
5417 ;;        ieee_float_shape_type sf_u;
5418 ;;        sf_u.word = (is & 0xfffff000);
5419 ;;        (t1) = sf_u.value;
5420 ;;      } while (0);
5421 ;;
5422 ;;
5423 ;; This would result in two direct move operations (convert to memory format,
5424 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5425 ;; scalar format).  With this peephole, we eliminate the direct move to the
5426 ;; GPR, and instead move the integer mask value to the vector register after a
5427 ;; shift and do the VSX logical operation.
5428
5429 ;; The insns for dealing with SFmode in GPR registers looks like:
5430 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5431 ;;
5432 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5433 ;;
5434 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5435 ;;
5436 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5437 ;;
5438 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5439 ;;
5440 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5441
5442 (define_peephole2
5443   [(match_scratch:DI SFBOOL_TMP_GPR "r")
5444    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5445
5446    ;; MFVSRWZ (aka zero_extend)
5447    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5448         (zero_extend:DI
5449          (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5450
5451    ;; AND/IOR/XOR operation on int
5452    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5453         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5454                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5455
5456    ;; SLDI
5457    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5458         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5459                    (const_int 32)))
5460
5461    ;; MTVSRD
5462    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5463         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5464
5465   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5466    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5467       to compare registers, when the mode is different.  */
5468    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5469    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5470    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5471    && (REG_P (operands[SFBOOL_BOOL_A2])
5472        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5473    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5474        || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5475    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5476        || (REG_P (operands[SFBOOL_BOOL_A2])
5477            && REGNO (operands[SFBOOL_MFVSR_D])
5478                 == REGNO (operands[SFBOOL_BOOL_A2])))
5479    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5480    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5481        || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5482    && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5483   [(set (match_dup SFBOOL_TMP_GPR)
5484         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5485                    (const_int 32)))
5486
5487    (set (match_dup SFBOOL_TMP_VSX_DI)
5488         (match_dup SFBOOL_TMP_GPR))
5489
5490    (set (match_dup SFBOOL_MTVSR_D_V4SF)
5491         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5492                           (match_dup SFBOOL_TMP_VSX)))]
5493 {
5494   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5495   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5496   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5497   int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5498   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5499   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5500
5501   if (CONST_INT_P (bool_a2))
5502     {
5503       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5504       emit_move_insn (tmp_gpr, bool_a2);
5505       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5506     }
5507   else
5508     {
5509       int regno_bool_a1 = REGNO (bool_a1);
5510       int regno_bool_a2 = REGNO (bool_a2);
5511       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5512                           ? regno_bool_a2 : regno_bool_a1);
5513       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5514     }
5515
5516   operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5517   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5518   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5519 })