gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Mode attribute for vector floate and floato conversions
  25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
  26
  27 ;; Iterator for both scalar and vector floating point types supported by VSX
  28 (define_mode_iterator VSX_B [DF V4SF V2DF])
  29
  30 ;; Iterator for the 2 64-bit vector types
  31 (define_mode_iterator VSX_D [V2DF V2DI])
  32
  33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  34 ;; types that goes in a single vector register.
  35 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  36                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  37                                   TI
  38                                   V1TI])
  39
  40 ;; Iterator for 128-bit integer types that go in a single vector register.
  41 (define_mode_iterator VSX_TI [TI V1TI])
  42
  43 ;; Iterator for the 2 32-bit vector types
  44 (define_mode_iterator VSX_W [V4SF V4SI])
  45
  46 ;; Iterator for the DF types
  47 (define_mode_iterator VSX_DF [V2DF DF])
  48
  49 ;; Iterator for vector floating point types supported by VSX
  50 (define_mode_iterator VSX_F [V4SF V2DF])
  51
  52 ;; Iterator for logical types supported by VSX
  53 (define_mode_iterator VSX_L [V16QI
  54                              V8HI
  55                              V4SI
  56                              V2DI
  57                              V4SF
  58                              V2DF
  59                              V1TI
  60                              TI
  61                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  62                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  63
  64 ;; Iterator for memory moves.
  65 (define_mode_iterator VSX_M [V16QI
  66                              V8HI
  67                              V4SI
  68                              V2DI
  69                              V4SF
  70                              V2DF
  71                              V1TI
  72                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  73                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  74                              TI])
  75
  76 (define_mode_attr VSX_XXBR  [(V8HI  "h")
  77                              (V4SI  "w")
  78                              (V4SF  "w")
  79                              (V2DF  "d")
  80                              (V2DI  "d")
  81                              (V1TI  "q")])
  82
  83 ;; Map into the appropriate load/store name based on the type
  84 (define_mode_attr VSm  [(V16QI "vw4")
  85                         (V8HI  "vw4")
  86                         (V4SI  "vw4")
  87                         (V4SF  "vw4")
  88                         (V2DF  "vd2")
  89                         (V2DI  "vd2")
  90                         (DF    "d")
  91                         (TF    "vd2")
  92                         (KF    "vd2")
  93                         (V1TI  "vd2")
  94                         (TI    "vd2")])
  95
  96 ;; Map into the appropriate suffix based on the type
  97 (define_mode_attr VSs   [(V16QI "sp")
  98                          (V8HI  "sp")
  99                          (V4SI  "sp")
 100                          (V4SF  "sp")
 101                          (V2DF  "dp")
 102                          (V2DI  "dp")
 103                          (DF    "dp")
 104                          (SF    "sp")
 105                          (TF    "dp")
 106                          (KF    "dp")
 107                          (V1TI  "dp")
 108                          (TI    "dp")])
 109
 110 ;; Map the register class used
 111 (define_mode_attr VSr   [(V16QI "v")
 112                          (V8HI  "v")
 113                          (V4SI  "v")
 114                          (V4SF  "wf")
 115                          (V2DI  "wd")
 116                          (V2DF  "wd")
 117                          (DI    "wi")
 118                          (DF    "ws")
 119                          (SF    "ww")
 120                          (TF    "wp")
 121                          (KF    "wq")
 122                          (V1TI  "v")
 123                          (TI    "wt")])
 124
 125 ;; Map the register class used for float<->int conversions (floating point side)
 126 ;; VSr2 is the preferred register class, VSr3 is any register class that will
 127 ;; hold the data
 128 (define_mode_attr VSr2  [(V2DF  "wd")
 129                          (V4SF  "wf")
 130                          (DF    "ws")
 131                          (SF    "ww")
 132                          (DI    "wi")
 133                          (KF    "wq")
 134                          (TF    "wp")])
 135
 136 (define_mode_attr VSr3  [(V2DF  "wa")
 137                          (V4SF  "wa")
 138                          (DF    "ws")
 139                          (SF    "ww")
 140                          (DI    "wi")
 141                          (KF    "wq")
 142                          (TF    "wp")])
 143
 144 ;; Map the register class for sp<->dp float conversions, destination
 145 (define_mode_attr VSr4  [(SF    "ws")
 146                          (DF    "f")
 147                          (V2DF  "wd")
 148                          (V4SF  "v")])
 149
 150 ;; Map the register class for sp<->dp float conversions, source
 151 (define_mode_attr VSr5  [(SF    "ws")
 152                          (DF    "f")
 153                          (V2DF  "v")
 154                          (V4SF  "wd")])
 155
 156 ;; The VSX register class that a type can occupy, even if it is not the
 157 ;; preferred register class (VSr is the preferred register class that will get
 158 ;; allocated first).
 159 (define_mode_attr VSa   [(V16QI "wa")
 160                          (V8HI  "wa")
 161                          (V4SI  "wa")
 162                          (V4SF  "wa")
 163                          (V2DI  "wa")
 164                          (V2DF  "wa")
 165                          (DI    "wi")
 166                          (DF    "ws")
 167                          (SF    "ww")
 168                          (V1TI  "wa")
 169                          (TI    "wt")
 170                          (TF    "wp")
 171                          (KF    "wq")])
 172
 173 ;; A mode attribute to disparage use of GPR registers, except for scalar
 174 ;; integer modes.
 175 (define_mode_attr ??r   [(V16QI "??r")
 176                          (V8HI  "??r")
 177                          (V4SI  "??r")
 178                          (V4SF  "??r")
 179                          (V2DI  "??r")
 180                          (V2DF  "??r")
 181                          (V1TI  "??r")
 182                          (KF    "??r")
 183                          (TF    "??r")
 184                          (TI    "r")])
 185
 186 ;; Same size integer type for floating point data
 187 (define_mode_attr VSi [(V4SF  "v4si")
 188                        (V2DF  "v2di")
 189                        (DF    "di")])
 190
 191 (define_mode_attr VSI [(V4SF  "V4SI")
 192                        (V2DF  "V2DI")
 193                        (DF    "DI")])
 194
 195 ;; Word size for same size conversion
 196 (define_mode_attr VSc [(V4SF "w")
 197                        (V2DF "d")
 198                        (DF   "d")])
 199
 200 ;; Map into either s or v, depending on whether this is a scalar or vector
 201 ;; operation
 202 (define_mode_attr VSv   [(V16QI "v")
 203                          (V8HI  "v")
 204                          (V4SI  "v")
 205                          (V4SF  "v")
 206                          (V2DI  "v")
 207                          (V2DF  "v")
 208                          (V1TI  "v")
 209                          (DF    "s")
 210                          (KF    "v")])
 211
 212 ;; Appropriate type for add ops (and other simple FP ops)
 213 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 214                                  (V4SF "vecfloat")
 215                                  (DF   "fp")])
 216
 217 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
 218                                    (V4SF "fp_addsub_s")
 219                                    (DF   "fp_addsub_d")])
 220
 221 ;; Appropriate type for multiply ops
 222 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 223                                  (V4SF "vecfloat")
 224                                  (DF   "dmul")])
 225
 226 (define_mode_attr VSfptype_mul  [(V2DF "fp_mul_d")
 227                                  (V4SF "fp_mul_s")
 228                                  (DF   "fp_mul_d")])
 229
 230 ;; Appropriate type for divide ops.
 231 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 232                                  (V4SF "vecfdiv")
 233                                  (DF   "ddiv")])
 234
 235 (define_mode_attr VSfptype_div  [(V2DF "fp_div_d")
 236                                  (V4SF "fp_div_s")
 237                                  (DF   "fp_div_d")])
 238
 239 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 240 ;; the scalar sqrt
 241 (define_mode_attr VStype_sqrt   [(V2DF "dsqrt")
 242                                  (V4SF "ssqrt")
 243                                  (DF   "dsqrt")])
 244
 245 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
 246                                  (V4SF "fp_sqrt_s")
 247                                  (DF   "fp_sqrt_d")])
 248
 249 ;; Iterator and modes for sp<->dp conversions
 250 ;; Because scalar SF values are represented internally as double, use the
 251 ;; V4SF type to represent this than SF.
 252 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
 253
 254 (define_mode_attr VS_spdp_res [(DF      "V4SF")
 255                                (V4SF    "V2DF")
 256                                (V2DF    "V4SF")])
 257
 258 (define_mode_attr VS_spdp_insn [(DF     "xscvdpsp")
 259                                 (V4SF   "xvcvspdp")
 260                                 (V2DF   "xvcvdpsp")])
 261
 262 (define_mode_attr VS_spdp_type [(DF     "fp")
 263                                 (V4SF   "vecdouble")
 264                                 (V2DF   "vecdouble")])
 265
 266 ;; Map the scalar mode for a vector type
 267 (define_mode_attr VS_scalar [(V1TI      "TI")
 268                              (V2DF      "DF")
 269                              (V2DI      "DI")
 270                              (V4SF      "SF")
 271                              (V4SI      "SI")
 272                              (V8HI      "HI")
 273                              (V16QI     "QI")])
 274
 275 ;; Map to a double-sized vector mode
 276 (define_mode_attr VS_double [(V4SI      "V8SI")
 277                              (V4SF      "V8SF")
 278                              (V2DI      "V4DI")
 279                              (V2DF      "V4DF")
 280                              (V1TI      "V2TI")])
 281
 282 ;; Map register class for 64-bit element in 128-bit vector for direct moves
 283 ;; to/from gprs
 284 (define_mode_attr VS_64dm [(V2DF        "wk")
 285                            (V2DI        "wj")])
 286
 287 ;; Map register class for 64-bit element in 128-bit vector for normal register
 288 ;; to register moves
 289 (define_mode_attr VS_64reg [(V2DF       "ws")
 290                             (V2DI       "wi")])
 291
 292 ;; Iterators for loading constants with xxspltib
 293 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 294 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 295
 296 ;; Vector reverse byte modes
 297 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 298
 299 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 300 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 301 ;; done on ISA 2.07 and not just ISA 3.0.
 302 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 303 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 304
 305 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 306                                      (V8HI "h")
 307                                      (V4SI "w")])
 308
 309 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 310 ;; insert to validate the operand number.
 311 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 312                                          (V8HI  "const_0_to_7_operand")
 313                                          (V4SI  "const_0_to_3_operand")])
 314
 315 ;; Mode attribute to give the constraint for vector extract and insert
 316 ;; operations.
 317 (define_mode_attr VSX_EX [(V16QI "v")
 318                           (V8HI  "v")
 319                           (V4SI  "wa")])
 320
 321 ;; Mode iterator for binary floating types other than double to
 322 ;; optimize convert to that floating point type from an extract
 323 ;; of an integer type
 324 (define_mode_iterator VSX_EXTRACT_FL [SF
 325                                       (IF "FLOAT128_2REG_P (IFmode)")
 326                                       (KF "TARGET_FLOAT128_HW")
 327                                       (TF "FLOAT128_2REG_P (TFmode)
 328                                            || (FLOAT128_IEEE_P (TFmode)
 329                                                && TARGET_FLOAT128_HW)")])
 330
 331 ;; Mode iterator for binary floating types that have a direct conversion
 332 ;; from 64-bit integer to floating point
 333 (define_mode_iterator FL_CONV [SF
 334                                DF
 335                                (KF "TARGET_FLOAT128_HW")
 336                                (TF "TARGET_FLOAT128_HW
 337                                     && FLOAT128_IEEE_P (TFmode)")])
 338
 339 ;; Iterator for the 2 short vector types to do a splat from an integer
 340 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 341
 342 ;; Mode attribute to give the count for the splat instruction to splat
 343 ;; the value in the 64-bit integer slot
 344 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 345
 346 ;; Mode attribute to give the suffix for the splat instruction
 347 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 348
 349 ;; Constants for creating unspecs
 350 (define_c_enum "unspec"
 351   [UNSPEC_VSX_CONCAT
 352    UNSPEC_VSX_CVDPSXWS
 353    UNSPEC_VSX_CVDPUXWS
 354    UNSPEC_VSX_CVSPDP
 355    UNSPEC_VSX_CVHPSP
 356    UNSPEC_VSX_CVSPDPN
 357    UNSPEC_VSX_CVDPSPN
 358    UNSPEC_VSX_CVSXWDP
 359    UNSPEC_VSX_CVUXWDP
 360    UNSPEC_VSX_CVSXDSP
 361    UNSPEC_VSX_CVUXDSP
 362    UNSPEC_VSX_CVSPSXDS
 363    UNSPEC_VSX_CVSPUXDS
 364    UNSPEC_VSX_CVSXWSP
 365    UNSPEC_VSX_CVUXWSP
 366    UNSPEC_VSX_FLOAT2
 367    UNSPEC_VSX_UNS_FLOAT2
 368    UNSPEC_VSX_FLOATE
 369    UNSPEC_VSX_UNS_FLOATE
 370    UNSPEC_VSX_FLOATO
 371    UNSPEC_VSX_UNS_FLOATO
 372    UNSPEC_VSX_TDIV
 373    UNSPEC_VSX_TSQRT
 374    UNSPEC_VSX_SET
 375    UNSPEC_VSX_ROUND_I
 376    UNSPEC_VSX_ROUND_IC
 377    UNSPEC_VSX_SLDWI
 378    UNSPEC_VSX_XXPERM
 379
 380    UNSPEC_VSX_XXSPLTW
 381    UNSPEC_VSX_XXSPLTD
 382    UNSPEC_VSX_DIVSD
 383    UNSPEC_VSX_DIVUD
 384    UNSPEC_VSX_MULSD
 385    UNSPEC_VSX_XVCVSXDDP
 386    UNSPEC_VSX_XVCVUXDDP
 387    UNSPEC_VSX_XVCVDPSXDS
 388    UNSPEC_VSX_XVCDPSP
 389    UNSPEC_VSX_XVCVDPUXDS
 390    UNSPEC_VSX_SIGN_EXTEND
 391    UNSPEC_VSX_XVCVSPSXWS
 392    UNSPEC_VSX_XVCVSPSXDS
 393    UNSPEC_VSX_VSLO
 394    UNSPEC_VSX_EXTRACT
 395    UNSPEC_VSX_SXEXPDP
 396    UNSPEC_VSX_SXSIG
 397    UNSPEC_VSX_SIEXPDP
 398    UNSPEC_VSX_SIEXPQP
 399    UNSPEC_VSX_SCMPEXPDP
 400    UNSPEC_VSX_STSTDC
 401    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
 402    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
 403    UNSPEC_VSX_VXEXP
 404    UNSPEC_VSX_VXSIG
 405    UNSPEC_VSX_VIEXP
 406    UNSPEC_VSX_VTSTDC
 407    UNSPEC_VSX_VEC_INIT
 408    UNSPEC_VSX_VSIGNED2
 409
 410    UNSPEC_LXVL
 411    UNSPEC_LXVLL
 412    UNSPEC_LVSL_REG
 413    UNSPEC_LVSR_REG
 414    UNSPEC_STXVL
 415    UNSPEC_STXVLL
 416    UNSPEC_XL_LEN_R
 417    UNSPEC_XST_LEN_R
 418
 419    UNSPEC_VCLZLSBB
 420    UNSPEC_VCTZLSBB
 421    UNSPEC_VEXTUBLX
 422    UNSPEC_VEXTUHLX
 423    UNSPEC_VEXTUWLX
 424    UNSPEC_VEXTUBRX
 425    UNSPEC_VEXTUHRX
 426    UNSPEC_VEXTUWRX
 427    UNSPEC_VCMPNEB
 428    UNSPEC_VCMPNEZB
 429    UNSPEC_VCMPNEH
 430    UNSPEC_VCMPNEZH
 431    UNSPEC_VCMPNEW
 432    UNSPEC_VCMPNEZW
 433    UNSPEC_XXEXTRACTUW
 434    UNSPEC_XXINSERTW
 435    UNSPEC_VSX_FIRST_MATCH_INDEX
 436    UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
 437    UNSPEC_VSX_FIRST_MISMATCH_INDEX
 438    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
 439   ])
 440
 441 ;; VSX moves
 442
 443 ;; The patterns for LE permuted loads and stores come before the general
 444 ;; VSX moves so they match first.
 445 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 446   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
 447         (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
 448   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 449   "#"
 450   "&& 1"
 451   [(set (match_dup 2)
 452         (vec_select:<MODE>
 453           (match_dup 1)
 454           (parallel [(const_int 1) (const_int 0)])))
 455    (set (match_dup 0)
 456         (vec_select:<MODE>
 457           (match_dup 2)
 458           (parallel [(const_int 1) (const_int 0)])))]
 459 {
 460   rtx mem = operands[1];
 461
 462   /* Don't apply the swap optimization if we've already performed register
 463      allocation and the hard register destination is not in the altivec
 464      range.  */
 465   if ((MEM_ALIGN (mem) >= 128)
 466       && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
 467           || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
 468     {
 469       rtx mem_address = XEXP (mem, 0);
 470       enum machine_mode mode = GET_MODE (mem);
 471
 472       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 473         {
 474           /* Replace the source memory address with masked address.  */
 475           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 476           emit_insn (lvx_set_expr);
 477           DONE;
 478         }
 479       else if (rs6000_quadword_masked_address_p (mem_address))
 480         {
 481           /* This rtl is already in the form that matches lvx
 482              instruction, so leave it alone.  */
 483           DONE;
 484         }
 485       /* Otherwise, fall through to transform into a swapping load.  */
 486     }
 487   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 488                                        : operands[0];
 489 }
 490   [(set_attr "type" "vecload")
 491    (set_attr "length" "8")])
 492
 493 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 494   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
 495         (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
 496   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 497   "#"
 498   "&& 1"
 499   [(set (match_dup 2)
 500         (vec_select:<MODE>
 501           (match_dup 1)
 502           (parallel [(const_int 2) (const_int 3)
 503                      (const_int 0) (const_int 1)])))
 504    (set (match_dup 0)
 505         (vec_select:<MODE>
 506           (match_dup 2)
 507           (parallel [(const_int 2) (const_int 3)
 508                      (const_int 0) (const_int 1)])))]
 509 {
 510   rtx mem = operands[1];
 511
 512   /* Don't apply the swap optimization if we've already performed register
 513      allocation and the hard register destination is not in the altivec
 514      range.  */
 515   if ((MEM_ALIGN (mem) >= 128)
 516       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 517           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 518     {
 519       rtx mem_address = XEXP (mem, 0);
 520       enum machine_mode mode = GET_MODE (mem);
 521
 522       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 523         {
 524           /* Replace the source memory address with masked address.  */
 525           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 526           emit_insn (lvx_set_expr);
 527           DONE;
 528         }
 529       else if (rs6000_quadword_masked_address_p (mem_address))
 530         {
 531           /* This rtl is already in the form that matches lvx
 532              instruction, so leave it alone.  */
 533           DONE;
 534         }
 535       /* Otherwise, fall through to transform into a swapping load.  */
 536     }
 537   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 538                                        : operands[0];
 539 }
 540   [(set_attr "type" "vecload")
 541    (set_attr "length" "8")])
 542
 543 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 544   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 545         (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
 546   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 547   "#"
 548   "&& 1"
 549   [(set (match_dup 2)
 550         (vec_select:V8HI
 551           (match_dup 1)
 552           (parallel [(const_int 4) (const_int 5)
 553                      (const_int 6) (const_int 7)
 554                      (const_int 0) (const_int 1)
 555                      (const_int 2) (const_int 3)])))
 556    (set (match_dup 0)
 557         (vec_select:V8HI
 558           (match_dup 2)
 559           (parallel [(const_int 4) (const_int 5)
 560                      (const_int 6) (const_int 7)
 561                      (const_int 0) (const_int 1)
 562                      (const_int 2) (const_int 3)])))]
 563 {
 564   rtx mem = operands[1];
 565
 566   /* Don't apply the swap optimization if we've already performed register
 567      allocation and the hard register destination is not in the altivec
 568      range.  */
 569   if ((MEM_ALIGN (mem) >= 128)
 570       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 571           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 572     {
 573       rtx mem_address = XEXP (mem, 0);
 574       enum machine_mode mode = GET_MODE (mem);
 575
 576       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 577         {
 578           /* Replace the source memory address with masked address.  */
 579           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 580           emit_insn (lvx_set_expr);
 581           DONE;
 582         }
 583       else if (rs6000_quadword_masked_address_p (mem_address))
 584         {
 585           /* This rtl is already in the form that matches lvx
 586              instruction, so leave it alone.  */
 587           DONE;
 588         }
 589       /* Otherwise, fall through to transform into a swapping load.  */
 590     }
 591   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 592                                        : operands[0];
 593 }
 594   [(set_attr "type" "vecload")
 595    (set_attr "length" "8")])
 596
 597 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 598   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 599         (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
 600   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 601   "#"
 602   "&& 1"
 603   [(set (match_dup 2)
 604         (vec_select:V16QI
 605           (match_dup 1)
 606           (parallel [(const_int 8) (const_int 9)
 607                      (const_int 10) (const_int 11)
 608                      (const_int 12) (const_int 13)
 609                      (const_int 14) (const_int 15)
 610                      (const_int 0) (const_int 1)
 611                      (const_int 2) (const_int 3)
 612                      (const_int 4) (const_int 5)
 613                      (const_int 6) (const_int 7)])))
 614    (set (match_dup 0)
 615         (vec_select:V16QI
 616           (match_dup 2)
 617           (parallel [(const_int 8) (const_int 9)
 618                      (const_int 10) (const_int 11)
 619                      (const_int 12) (const_int 13)
 620                      (const_int 14) (const_int 15)
 621                      (const_int 0) (const_int 1)
 622                      (const_int 2) (const_int 3)
 623                      (const_int 4) (const_int 5)
 624                      (const_int 6) (const_int 7)])))]
 625 {
 626   rtx mem = operands[1];
 627
 628   /* Don't apply the swap optimization if we've already performed register
 629      allocation and the hard register destination is not in the altivec
 630      range.  */
 631   if ((MEM_ALIGN (mem) >= 128)
 632       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 633           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 634     {
 635       rtx mem_address = XEXP (mem, 0);
 636       enum machine_mode mode = GET_MODE (mem);
 637
 638       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 639         {
 640           /* Replace the source memory address with masked address.  */
 641           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 642           emit_insn (lvx_set_expr);
 643           DONE;
 644         }
 645       else if (rs6000_quadword_masked_address_p (mem_address))
 646         {
 647           /* This rtl is already in the form that matches lvx
 648              instruction, so leave it alone.  */
 649           DONE;
 650         }
 651       /* Otherwise, fall through to transform into a swapping load.  */
 652     }
 653   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 654                                        : operands[0];
 655 }
 656   [(set_attr "type" "vecload")
 657    (set_attr "length" "8")])
 658
 659 (define_insn "*vsx_le_perm_store_<mode>"
 660   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
 661         (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
 662   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 663   "#"
 664   [(set_attr "type" "vecstore")
 665    (set_attr "length" "12")])
 666
 667 (define_split
 668   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 669         (match_operand:VSX_D 1 "vsx_register_operand"))]
 670   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 671   [(set (match_dup 2)
 672         (vec_select:<MODE>
 673           (match_dup 1)
 674           (parallel [(const_int 1) (const_int 0)])))
 675    (set (match_dup 0)
 676         (vec_select:<MODE>
 677           (match_dup 2)
 678           (parallel [(const_int 1) (const_int 0)])))]
 679 {
 680   rtx mem = operands[0];
 681
 682   /* Don't apply the swap optimization if we've already performed register
 683      allocation and the hard register source is not in the altivec range.  */
 684   if ((MEM_ALIGN (mem) >= 128)
 685       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 686           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 687     {
 688       rtx mem_address = XEXP (mem, 0);
 689       enum machine_mode mode = GET_MODE (mem);
 690       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 691         {
 692           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 693           emit_insn (stvx_set_expr);
 694           DONE;
 695         }
 696       else if (rs6000_quadword_masked_address_p (mem_address))
 697         {
 698           /* This rtl is already in the form that matches stvx instruction,
 699              so leave it alone.  */
 700           DONE;
 701         }
 702       /* Otherwise, fall through to transform into a swapping store.  */
 703     }
 704
 705   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 706                                        : operands[1];
 707 })
 708
 709 ;; The post-reload split requires that we re-permute the source
 710 ;; register in case it is still live.
 711 (define_split
 712   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 713         (match_operand:VSX_D 1 "vsx_register_operand"))]
 714   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 715   [(set (match_dup 1)
 716         (vec_select:<MODE>
 717           (match_dup 1)
 718           (parallel [(const_int 1) (const_int 0)])))
 719    (set (match_dup 0)
 720         (vec_select:<MODE>
 721           (match_dup 1)
 722           (parallel [(const_int 1) (const_int 0)])))
 723    (set (match_dup 1)
 724         (vec_select:<MODE>
 725           (match_dup 1)
 726           (parallel [(const_int 1) (const_int 0)])))]
 727   "")
 728
 729 (define_insn "*vsx_le_perm_store_<mode>"
 730   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
 731         (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
 732   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 733   "#"
 734   [(set_attr "type" "vecstore")
 735    (set_attr "length" "12")])
 736
 737 (define_split
 738   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 739         (match_operand:VSX_W 1 "vsx_register_operand"))]
 740   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 741   [(set (match_dup 2)
 742         (vec_select:<MODE>
 743           (match_dup 1)
 744           (parallel [(const_int 2) (const_int 3)
 745                      (const_int 0) (const_int 1)])))
 746    (set (match_dup 0)
 747         (vec_select:<MODE>
 748           (match_dup 2)
 749           (parallel [(const_int 2) (const_int 3)
 750                      (const_int 0) (const_int 1)])))]
 751 {
 752   rtx mem = operands[0];
 753
 754   /* Don't apply the swap optimization if we've already performed register
 755      allocation and the hard register source is not in the altivec range.  */
 756   if ((MEM_ALIGN (mem) >= 128)
 757       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 758           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 759     {
 760       rtx mem_address = XEXP (mem, 0);
 761       enum machine_mode mode = GET_MODE (mem);
 762       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 763         {
 764           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 765           emit_insn (stvx_set_expr);
 766           DONE;
 767         }
 768       else if (rs6000_quadword_masked_address_p (mem_address))
 769         {
 770           /* This rtl is already in the form that matches stvx instruction,
 771              so leave it alone.  */
 772           DONE;
 773         }
 774       /* Otherwise, fall through to transform into a swapping store.  */
 775     }
 776
 777   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 778                                        : operands[1];
 779 })
 780
 781 ;; The post-reload split requires that we re-permute the source
 782 ;; register in case it is still live.
 783 (define_split
 784   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 785         (match_operand:VSX_W 1 "vsx_register_operand"))]
 786   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 787   [(set (match_dup 1)
 788         (vec_select:<MODE>
 789           (match_dup 1)
 790           (parallel [(const_int 2) (const_int 3)
 791                      (const_int 0) (const_int 1)])))
 792    (set (match_dup 0)
 793         (vec_select:<MODE>
 794           (match_dup 1)
 795           (parallel [(const_int 2) (const_int 3)
 796                      (const_int 0) (const_int 1)])))
 797    (set (match_dup 1)
 798         (vec_select:<MODE>
 799           (match_dup 1)
 800           (parallel [(const_int 2) (const_int 3)
 801                      (const_int 0) (const_int 1)])))]
 802   "")
 803
 804 (define_insn "*vsx_le_perm_store_v8hi"
 805   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
 806         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 807   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 808   "#"
 809   [(set_attr "type" "vecstore")
 810    (set_attr "length" "12")])
 811
 812 (define_split
 813   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 814         (match_operand:V8HI 1 "vsx_register_operand"))]
 815   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 816   [(set (match_dup 2)
 817         (vec_select:V8HI
 818           (match_dup 1)
 819           (parallel [(const_int 4) (const_int 5)
 820                      (const_int 6) (const_int 7)
 821                      (const_int 0) (const_int 1)
 822                      (const_int 2) (const_int 3)])))
 823    (set (match_dup 0)
 824         (vec_select:V8HI
 825           (match_dup 2)
 826           (parallel [(const_int 4) (const_int 5)
 827                      (const_int 6) (const_int 7)
 828                      (const_int 0) (const_int 1)
 829                      (const_int 2) (const_int 3)])))]
 830 {
 831   rtx mem = operands[0];
 832
 833   /* Don't apply the swap optimization if we've already performed register
 834      allocation and the hard register source is not in the altivec range.  */
 835   if ((MEM_ALIGN (mem) >= 128)
 836       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 837           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 838     {
 839       rtx mem_address = XEXP (mem, 0);
 840       enum machine_mode mode = GET_MODE (mem);
 841       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 842         {
 843           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 844           emit_insn (stvx_set_expr);
 845           DONE;
 846         }
 847       else if (rs6000_quadword_masked_address_p (mem_address))
 848         {
 849           /* This rtl is already in the form that matches stvx instruction,
 850              so leave it alone.  */
 851           DONE;
 852         }
 853       /* Otherwise, fall through to transform into a swapping store.  */
 854     }
 855
 856   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 857                                        : operands[1];
 858 })
 859
 860 ;; The post-reload split requires that we re-permute the source
 861 ;; register in case it is still live.
 862 (define_split
 863   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 864         (match_operand:V8HI 1 "vsx_register_operand"))]
 865   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 866   [(set (match_dup 1)
 867         (vec_select:V8HI
 868           (match_dup 1)
 869           (parallel [(const_int 4) (const_int 5)
 870                      (const_int 6) (const_int 7)
 871                      (const_int 0) (const_int 1)
 872                      (const_int 2) (const_int 3)])))
 873    (set (match_dup 0)
 874         (vec_select:V8HI
 875           (match_dup 1)
 876           (parallel [(const_int 4) (const_int 5)
 877                      (const_int 6) (const_int 7)
 878                      (const_int 0) (const_int 1)
 879                      (const_int 2) (const_int 3)])))
 880    (set (match_dup 1)
 881         (vec_select:V8HI
 882           (match_dup 1)
 883           (parallel [(const_int 4) (const_int 5)
 884                      (const_int 6) (const_int 7)
 885                      (const_int 0) (const_int 1)
 886                      (const_int 2) (const_int 3)])))]
 887   "")
 888
 889 (define_insn "*vsx_le_perm_store_v16qi"
 890   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
 891         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 892   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 893   "#"
 894   [(set_attr "type" "vecstore")
 895    (set_attr "length" "12")])
 896
 897 (define_split
 898   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 899         (match_operand:V16QI 1 "vsx_register_operand"))]
 900   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 901   [(set (match_dup 2)
 902         (vec_select:V16QI
 903           (match_dup 1)
 904           (parallel [(const_int 8) (const_int 9)
 905                      (const_int 10) (const_int 11)
 906                      (const_int 12) (const_int 13)
 907                      (const_int 14) (const_int 15)
 908                      (const_int 0) (const_int 1)
 909                      (const_int 2) (const_int 3)
 910                      (const_int 4) (const_int 5)
 911                      (const_int 6) (const_int 7)])))
 912    (set (match_dup 0)
 913         (vec_select:V16QI
 914           (match_dup 2)
 915           (parallel [(const_int 8) (const_int 9)
 916                      (const_int 10) (const_int 11)
 917                      (const_int 12) (const_int 13)
 918                      (const_int 14) (const_int 15)
 919                      (const_int 0) (const_int 1)
 920                      (const_int 2) (const_int 3)
 921                      (const_int 4) (const_int 5)
 922                      (const_int 6) (const_int 7)])))]
 923 {
 924   rtx mem = operands[0];
 925
 926   /* Don't apply the swap optimization if we've already performed register
 927      allocation and the hard register source is not in the altivec range.  */
 928   if ((MEM_ALIGN (mem) >= 128)
 929       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 930           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 931     {
 932       rtx mem_address = XEXP (mem, 0);
 933       enum machine_mode mode = GET_MODE (mem);
 934       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 935         {
 936           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 937           emit_insn (stvx_set_expr);
 938           DONE;
 939         }
 940       else if (rs6000_quadword_masked_address_p (mem_address))
 941         {
 942           /* This rtl is already in the form that matches stvx instruction,
 943              so leave it alone.  */
 944           DONE;
 945         }
 946       /* Otherwise, fall through to transform into a swapping store.  */
 947     }
 948
 949   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 950                                        : operands[1];
 951 })
 952
 953 ;; The post-reload split requires that we re-permute the source
 954 ;; register in case it is still live.
 955 (define_split
 956   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 957         (match_operand:V16QI 1 "vsx_register_operand"))]
 958   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 959   [(set (match_dup 1)
 960         (vec_select:V16QI
 961           (match_dup 1)
 962           (parallel [(const_int 8) (const_int 9)
 963                      (const_int 10) (const_int 11)
 964                      (const_int 12) (const_int 13)
 965                      (const_int 14) (const_int 15)
 966                      (const_int 0) (const_int 1)
 967                      (const_int 2) (const_int 3)
 968                      (const_int 4) (const_int 5)
 969                      (const_int 6) (const_int 7)])))
 970    (set (match_dup 0)
 971         (vec_select:V16QI
 972           (match_dup 1)
 973           (parallel [(const_int 8) (const_int 9)
 974                      (const_int 10) (const_int 11)
 975                      (const_int 12) (const_int 13)
 976                      (const_int 14) (const_int 15)
 977                      (const_int 0) (const_int 1)
 978                      (const_int 2) (const_int 3)
 979                      (const_int 4) (const_int 5)
 980                      (const_int 6) (const_int 7)])))
 981    (set (match_dup 1)
 982         (vec_select:V16QI
 983           (match_dup 1)
 984           (parallel [(const_int 8) (const_int 9)
 985                      (const_int 10) (const_int 11)
 986                      (const_int 12) (const_int 13)
 987                      (const_int 14) (const_int 15)
 988                      (const_int 0) (const_int 1)
 989                      (const_int 2) (const_int 3)
 990                      (const_int 4) (const_int 5)
 991                      (const_int 6) (const_int 7)])))]
 992   "")
 993
 994 ;; Little endian word swapping for 128-bit types that are either scalars or the
 995 ;; special V1TI container class, which it is not appropriate to use vec_select
 996 ;; for the type.
 997 (define_insn "*vsx_le_permute_<mode>"
 998   [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
 999         (rotate:VSX_TI
1000          (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
1001          (const_int 64)))]
1002   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1003   "@
1004    xxpermdi %x0,%x1,%x1,2
1005    lxvd2x %x0,%y1
1006    stxvd2x %x1,%y0
1007    mr %0,%L1\;mr %L0,%1
1008    ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
1009    std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
1010   [(set_attr "length" "4,4,4,8,8,8")
1011    (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
1012
1013 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
1014   [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
1015         (rotate:VSX_TI
1016          (rotate:VSX_TI
1017           (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
1018           (const_int 64))
1019          (const_int 64)))]
1020   "!BYTES_BIG_ENDIAN && TARGET_VSX"
1021   "@
1022    #
1023    xxlor %x0,%x1"
1024   ""
1025   [(set (match_dup 0) (match_dup 1))]
1026 {
1027   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1028     {
1029       emit_note (NOTE_INSN_DELETED);
1030       DONE;
1031     }
1032 }
1033   [(set_attr "length" "0,4")
1034    (set_attr "type" "veclogical")])
1035
1036 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1037   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1038         (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1039   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1040   "@
1041    #
1042    #"
1043   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1044   [(const_int 0)]
1045 {
1046   rtx tmp = (can_create_pseudo_p ()
1047              ? gen_reg_rtx_and_attrs (operands[0])
1048              : operands[0]);
1049   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1050   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1051   DONE;
1052 }
1053   [(set_attr "type" "vecload,load")
1054    (set_attr "length" "8,8")])
1055
1056 (define_insn "*vsx_le_perm_store_<mode>"
1057   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1058         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
1059   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1060   "@
1061    #
1062    #"
1063   [(set_attr "type" "vecstore,store")
1064    (set_attr "length" "12,8")])
1065
1066 (define_split
1067   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1068         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1069   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1070   [(const_int 0)]
1071 {
1072   rtx tmp = (can_create_pseudo_p ()
1073              ? gen_reg_rtx_and_attrs (operands[0])
1074              : operands[0]);
1075   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1076   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1077   DONE;
1078 })
1079
1080 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1081 ;; GPR registers on a little endian system.
1082 (define_peephole2
1083   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1084         (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1085                        (const_int 64)))
1086    (set (match_operand:VSX_TI 2 "int_reg_operand")
1087         (rotate:VSX_TI (match_dup 0)
1088                        (const_int 64)))]
1089   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1090    && (rtx_equal_p (operands[0], operands[2])
1091        || peep2_reg_dead_p (2, operands[0]))"
1092    [(set (match_dup 2) (match_dup 1))])
1093
1094 (define_peephole2
1095   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1096         (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1097                        (const_int 64)))
1098    (set (match_operand:VSX_TI 2 "memory_operand")
1099         (rotate:VSX_TI (match_dup 0)
1100                        (const_int 64)))]
1101   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1102    && peep2_reg_dead_p (2, operands[0])"
1103    [(set (match_dup 2) (match_dup 1))])
1104
1105 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1106 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1107 ;; floating point are handled by the more generic swap elimination pass.
1108 (define_peephole2
1109   [(set (match_operand:TI 0 "vsx_register_operand")
1110         (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1111                    (const_int 64)))
1112    (set (match_operand:TI 2 "vsx_register_operand")
1113         (rotate:TI (match_dup 0)
1114                    (const_int 64)))]
1115   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1116    && (rtx_equal_p (operands[0], operands[2])
1117        || peep2_reg_dead_p (2, operands[0]))"
1118    [(set (match_dup 2) (match_dup 1))])
1119
1120 ;; The post-reload split requires that we re-permute the source
1121 ;; register in case it is still live.
1122 (define_split
1123   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1124         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1125   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1126   [(const_int 0)]
1127 {
1128   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1129   rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1130   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1131   DONE;
1132 })
1133
1134 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1135 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1136 (define_insn "xxspltib_v16qi"
1137   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1138         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1139   "TARGET_P9_VECTOR"
1140 {
1141   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1142   return "xxspltib %x0,%2";
1143 }
1144   [(set_attr "type" "vecperm")])
1145
1146 (define_insn "xxspltib_<mode>_nosplit"
1147   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1148         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1149   "TARGET_P9_VECTOR"
1150 {
1151   rtx op1 = operands[1];
1152   int value = 256;
1153   int num_insns = -1;
1154
1155   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1156       || num_insns != 1)
1157     gcc_unreachable ();
1158
1159   operands[2] = GEN_INT (value & 0xff);
1160   return "xxspltib %x0,%2";
1161 }
1162   [(set_attr "type" "vecperm")])
1163
1164 (define_insn_and_split "*xxspltib_<mode>_split"
1165   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1166         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1167   "TARGET_P9_VECTOR"
1168   "#"
1169   "&& 1"
1170   [(const_int 0)]
1171 {
1172   int value = 256;
1173   int num_insns = -1;
1174   rtx op0 = operands[0];
1175   rtx op1 = operands[1];
1176   rtx tmp = ((can_create_pseudo_p ())
1177              ? gen_reg_rtx (V16QImode)
1178              : gen_lowpart (V16QImode, op0));
1179
1180   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1181       || num_insns != 2)
1182     gcc_unreachable ();
1183
1184   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1185
1186   if (<MODE>mode == V2DImode)
1187     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1188
1189   else if (<MODE>mode == V4SImode)
1190     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1191
1192   else if (<MODE>mode == V8HImode)
1193     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1194
1195   else
1196     gcc_unreachable ();
1197
1198   DONE;
1199 }
1200   [(set_attr "type" "vecperm")
1201    (set_attr "length" "8")])
1202
1203
1204 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1205 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1206 ;; all 1's, since the machine does not have to wait for the previous
1207 ;; instruction using the register being set (such as a store waiting on a slow
1208 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1209
1210 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1211 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1212 ;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
1213 (define_insn "*vsx_mov<mode>_64bit"
1214   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1215                "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
1216                 ?&r,       ??r,       ??Y,       <??r>,     wo,        v,
1217                 ?<VSa>,    *r,        v,         ??r,       wZ,        v")
1218
1219         (match_operand:VSX_M 1 "input_operand"
1220                "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
1221                 wQ,        Y,         r,         r,         wE,        jwM,
1222                 ?jwM,      jwM,       W,         W,         v,         wZ"))]
1223
1224   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1225    && (register_operand (operands[0], <MODE>mode)
1226        || register_operand (operands[1], <MODE>mode))"
1227 {
1228   return rs6000_output_move_128bit (operands);
1229 }
1230   [(set_attr "type"
1231                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1232                 store,     load,      store,     *,         vecsimple, vecsimple,
1233                 vecsimple, *,         *,         *,         vecstore,  vecload")
1234
1235    (set_attr "length"
1236                "4,         4,         4,         8,         4,         8,
1237                 8,         8,         8,         8,         4,         4,
1238                 4,         8,         20,        20,        4,         4")])
1239
1240 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1241 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
1242 ;;              LVX (VMX)  STVX (VMX)
1243 (define_insn "*vsx_mov<mode>_32bit"
1244   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1245                "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       <??r>,
1246                 wo,        v,         ?<VSa>,    *r,        v,         ??r,
1247                 wZ,        v")
1248
1249         (match_operand:VSX_M 1 "input_operand"
1250                "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
1251                 wE,        jwM,       ?jwM,      jwM,       W,         W,
1252                 v,         wZ"))]
1253
1254   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1255    && (register_operand (operands[0], <MODE>mode)
1256        || register_operand (operands[1], <MODE>mode))"
1257 {
1258   return rs6000_output_move_128bit (operands);
1259 }
1260   [(set_attr "type"
1261                "vecstore,  vecload,   vecsimple, load,      store,    *,
1262                 vecsimple, vecsimple, vecsimple, *,         *,        *,
1263                 vecstore,  vecload")
1264
1265    (set_attr "length"
1266                "4,         4,         4,         16,        16,        16,
1267                 4,         4,         4,         16,        20,        32,
1268                 4,         4")])
1269
1270 ;; Explicit  load/store expanders for the builtin functions
1271 (define_expand "vsx_load_<mode>"
1272   [(set (match_operand:VSX_M 0 "vsx_register_operand")
1273         (match_operand:VSX_M 1 "memory_operand"))]
1274   "VECTOR_MEM_VSX_P (<MODE>mode)"
1275 {
1276   /* Expand to swaps if needed, prior to swap optimization.  */
1277   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1278     {
1279       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1280       DONE;
1281     }
1282 })
1283
1284 (define_expand "vsx_store_<mode>"
1285   [(set (match_operand:VSX_M 0 "memory_operand")
1286         (match_operand:VSX_M 1 "vsx_register_operand"))]
1287   "VECTOR_MEM_VSX_P (<MODE>mode)"
1288 {
1289   /* Expand to swaps if needed, prior to swap optimization.  */
1290   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1291     {
1292       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1293       DONE;
1294     }
1295 })
1296
1297 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1298 ;; when you really want their element-reversing behavior.
1299 (define_insn "vsx_ld_elemrev_v2di"
1300   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1301         (vec_select:V2DI
1302           (match_operand:V2DI 1 "memory_operand" "Z")
1303           (parallel [(const_int 1) (const_int 0)])))]
1304   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1305   "lxvd2x %x0,%y1"
1306   [(set_attr "type" "vecload")])
1307
1308 (define_insn "vsx_ld_elemrev_v1ti"
1309   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1310         (vec_select:V1TI
1311           (match_operand:V1TI 1 "memory_operand" "Z")
1312           (parallel [(const_int 0)])))]
1313   "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1314 {
1315    return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1316 }
1317   [(set_attr "type" "vecload")])
1318
1319 (define_insn "vsx_ld_elemrev_v2df"
1320   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1321         (vec_select:V2DF
1322           (match_operand:V2DF 1 "memory_operand" "Z")
1323           (parallel [(const_int 1) (const_int 0)])))]
1324   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1325   "lxvd2x %x0,%y1"
1326   [(set_attr "type" "vecload")])
1327
1328 (define_insn "vsx_ld_elemrev_v4si"
1329   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1330         (vec_select:V4SI
1331           (match_operand:V4SI 1 "memory_operand" "Z")
1332           (parallel [(const_int 3) (const_int 2)
1333                      (const_int 1) (const_int 0)])))]
1334   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1335   "lxvw4x %x0,%y1"
1336   [(set_attr "type" "vecload")])
1337
1338 (define_insn "vsx_ld_elemrev_v4sf"
1339   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1340         (vec_select:V4SF
1341           (match_operand:V4SF 1 "memory_operand" "Z")
1342           (parallel [(const_int 3) (const_int 2)
1343                      (const_int 1) (const_int 0)])))]
1344   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1345   "lxvw4x %x0,%y1"
1346   [(set_attr "type" "vecload")])
1347
1348 (define_expand "vsx_ld_elemrev_v8hi"
1349   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1350         (vec_select:V8HI
1351           (match_operand:V8HI 1 "memory_operand" "Z")
1352           (parallel [(const_int 7) (const_int 6)
1353                      (const_int 5) (const_int 4)
1354                      (const_int 3) (const_int 2)
1355                      (const_int 1) (const_int 0)])))]
1356   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1357 {
1358   if (!TARGET_P9_VECTOR)
1359     {
1360       rtx tmp = gen_reg_rtx (V4SImode);
1361       rtx subreg, subreg2, perm[16], pcv;
1362       /* 2 is leftmost element in register */
1363       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1364       int i;
1365
1366       subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1367       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1368       subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1369
1370       for (i = 0; i < 16; ++i)
1371         perm[i] = GEN_INT (reorder[i]);
1372
1373       pcv = force_reg (V16QImode,
1374                        gen_rtx_CONST_VECTOR (V16QImode,
1375                                              gen_rtvec_v (16, perm)));
1376       emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1377                                                 subreg2, pcv));
1378       DONE;
1379     }
1380 })
1381
1382 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1383   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1384         (vec_select:V8HI
1385           (match_operand:V8HI 1 "memory_operand" "Z")
1386           (parallel [(const_int 7) (const_int 6)
1387                      (const_int 5) (const_int 4)
1388                      (const_int 3) (const_int 2)
1389                      (const_int 1) (const_int 0)])))]
1390   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1391   "lxvh8x %x0,%y1"
1392   [(set_attr "type" "vecload")])
1393
1394 (define_expand "vsx_ld_elemrev_v16qi"
1395   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1396         (vec_select:V16QI
1397           (match_operand:V16QI 1 "memory_operand" "Z")
1398           (parallel [(const_int 15) (const_int 14)
1399                      (const_int 13) (const_int 12)
1400                      (const_int 11) (const_int 10)
1401                      (const_int  9) (const_int  8)
1402                      (const_int  7) (const_int  6)
1403                      (const_int  5) (const_int  4)
1404                      (const_int  3) (const_int  2)
1405                      (const_int  1) (const_int  0)])))]
1406   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1407 {
1408   if (!TARGET_P9_VECTOR)
1409     {
1410       rtx tmp = gen_reg_rtx (V4SImode);
1411       rtx subreg, subreg2, perm[16], pcv;
1412       /* 3 is leftmost element in register */
1413       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1414       int i;
1415
1416       subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1417       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1418       subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1419
1420       for (i = 0; i < 16; ++i)
1421         perm[i] = GEN_INT (reorder[i]);
1422
1423       pcv = force_reg (V16QImode,
1424                        gen_rtx_CONST_VECTOR (V16QImode,
1425                                              gen_rtvec_v (16, perm)));
1426       emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1427                                                  subreg2, pcv));
1428       DONE;
1429     }
1430 })
1431
1432 (define_insn "*vsx_ld_elemrev_v16qi_internal"
1433   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1434         (vec_select:V16QI
1435           (match_operand:V16QI 1 "memory_operand" "Z")
1436           (parallel [(const_int 15) (const_int 14)
1437                      (const_int 13) (const_int 12)
1438                      (const_int 11) (const_int 10)
1439                      (const_int  9) (const_int  8)
1440                      (const_int  7) (const_int  6)
1441                      (const_int  5) (const_int  4)
1442                      (const_int  3) (const_int  2)
1443                      (const_int  1) (const_int  0)])))]
1444   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1445   "lxvb16x %x0,%y1"
1446   [(set_attr "type" "vecload")])
1447
1448 (define_insn "vsx_st_elemrev_v1ti"
1449   [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1450         (vec_select:V1TI
1451           (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1452           (parallel [(const_int 0)])))
1453    (clobber (match_dup 1))]
1454   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1455 {
1456   return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1457 }
1458   [(set_attr "type" "vecstore")])
1459
1460 (define_insn "vsx_st_elemrev_v2df"
1461   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1462         (vec_select:V2DF
1463           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1464           (parallel [(const_int 1) (const_int 0)])))]
1465   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1466   "stxvd2x %x1,%y0"
1467   [(set_attr "type" "vecstore")])
1468
1469 (define_insn "vsx_st_elemrev_v2di"
1470   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1471         (vec_select:V2DI
1472           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1473           (parallel [(const_int 1) (const_int 0)])))]
1474   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1475   "stxvd2x %x1,%y0"
1476   [(set_attr "type" "vecstore")])
1477
1478 (define_insn "vsx_st_elemrev_v4sf"
1479   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1480         (vec_select:V4SF
1481           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1482           (parallel [(const_int 3) (const_int 2)
1483                      (const_int 1) (const_int 0)])))]
1484   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1485   "stxvw4x %x1,%y0"
1486   [(set_attr "type" "vecstore")])
1487
1488 (define_insn "vsx_st_elemrev_v4si"
1489   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1490         (vec_select:V4SI
1491           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1492           (parallel [(const_int 3) (const_int 2)
1493                      (const_int 1) (const_int 0)])))]
1494   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1495   "stxvw4x %x1,%y0"
1496   [(set_attr "type" "vecstore")])
1497
1498 (define_expand "vsx_st_elemrev_v8hi"
1499   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1500         (vec_select:V8HI
1501           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1502           (parallel [(const_int 7) (const_int 6)
1503                      (const_int 5) (const_int 4)
1504                      (const_int 3) (const_int 2)
1505                      (const_int 1) (const_int 0)])))]
1506   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1507 {
1508   if (!TARGET_P9_VECTOR)
1509     {
1510       rtx mem_subreg, subreg, perm[16], pcv;
1511       rtx tmp = gen_reg_rtx (V8HImode);
1512       /* 2 is leftmost element in register */
1513       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1514       int i;
1515
1516       for (i = 0; i < 16; ++i)
1517         perm[i] = GEN_INT (reorder[i]);
1518
1519       pcv = force_reg (V16QImode,
1520                        gen_rtx_CONST_VECTOR (V16QImode,
1521                                              gen_rtvec_v (16, perm)));
1522       emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1523                                                 operands[1], pcv));
1524       subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1525       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1526       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1527       DONE;
1528     }
1529 })
1530
1531 (define_insn "*vsx_st_elemrev_v2di_internal"
1532   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1533         (vec_select:V2DI
1534           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1535           (parallel [(const_int 1) (const_int 0)])))]
1536   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1537   "stxvd2x %x1,%y0"
1538   [(set_attr "type" "vecstore")])
1539
1540 (define_insn "*vsx_st_elemrev_v8hi_internal"
1541   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1542         (vec_select:V8HI
1543           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1544           (parallel [(const_int 7) (const_int 6)
1545                      (const_int 5) (const_int 4)
1546                      (const_int 3) (const_int 2)
1547                      (const_int 1) (const_int 0)])))]
1548   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1549   "stxvh8x %x1,%y0"
1550   [(set_attr "type" "vecstore")])
1551
1552 (define_expand "vsx_st_elemrev_v16qi"
1553   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1554         (vec_select:V16QI
1555           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1556           (parallel [(const_int 15) (const_int 14)
1557                      (const_int 13) (const_int 12)
1558                      (const_int 11) (const_int 10)
1559                      (const_int  9) (const_int  8)
1560                      (const_int  7) (const_int  6)
1561                      (const_int  5) (const_int  4)
1562                      (const_int  3) (const_int  2)
1563                      (const_int  1) (const_int  0)])))]
1564   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1565 {
1566   if (!TARGET_P9_VECTOR)
1567     {
1568       rtx mem_subreg, subreg, perm[16], pcv;
1569       rtx tmp = gen_reg_rtx (V16QImode);
1570       /* 3 is leftmost element in register */
1571       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1572       int i;
1573
1574       for (i = 0; i < 16; ++i)
1575         perm[i] = GEN_INT (reorder[i]);
1576
1577       pcv = force_reg (V16QImode,
1578                        gen_rtx_CONST_VECTOR (V16QImode,
1579                                              gen_rtvec_v (16, perm)));
1580       emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1581                                                  operands[1], pcv));
1582       subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1583       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1584       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1585       DONE;
1586     }
1587 })
1588
1589 (define_insn "*vsx_st_elemrev_v16qi_internal"
1590   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1591         (vec_select:V16QI
1592           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1593           (parallel [(const_int 15) (const_int 14)
1594                      (const_int 13) (const_int 12)
1595                      (const_int 11) (const_int 10)
1596                      (const_int  9) (const_int  8)
1597                      (const_int  7) (const_int  6)
1598                      (const_int  5) (const_int  4)
1599                      (const_int  3) (const_int  2)
1600                      (const_int  1) (const_int  0)])))]
1601   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1602   "stxvb16x %x1,%y0"
1603   [(set_attr "type" "vecstore")])
1604
1605 \f
1606 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1607 ;; instructions are now combined with the insn for the traditional floating
1608 ;; point unit.
1609 (define_insn "*vsx_add<mode>3"
1610   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1611         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1612                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1613   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1614   "xvadd<VSs> %x0,%x1,%x2"
1615   [(set_attr "type" "<VStype_simple>")
1616    (set_attr "fp_type" "<VSfptype_simple>")])
1617
1618 (define_insn "*vsx_sub<mode>3"
1619   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1620         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1621                      (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1622   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1623   "xvsub<VSs> %x0,%x1,%x2"
1624   [(set_attr "type" "<VStype_simple>")
1625    (set_attr "fp_type" "<VSfptype_simple>")])
1626
1627 (define_insn "*vsx_mul<mode>3"
1628   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1629         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1630                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1631   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1632   "xvmul<VSs> %x0,%x1,%x2"
1633   [(set_attr "type" "<VStype_simple>")
1634    (set_attr "fp_type" "<VSfptype_mul>")])
1635
1636 ; Emulate vector with scalar for vec_mul in V2DImode
1637 (define_insn_and_split "vsx_mul_v2di"
1638   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1639         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1640                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1641                      UNSPEC_VSX_MULSD))]
1642   "VECTOR_MEM_VSX_P (V2DImode)"
1643   "#"
1644   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1645   [(const_int 0)]
1646 {
1647   rtx op0 = operands[0];
1648   rtx op1 = operands[1];
1649   rtx op2 = operands[2];
1650   rtx op3 = gen_reg_rtx (DImode);
1651   rtx op4 = gen_reg_rtx (DImode);
1652   rtx op5 = gen_reg_rtx (DImode);
1653   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1654   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1655   if (TARGET_POWERPC64)
1656     emit_insn (gen_muldi3 (op5, op3, op4));
1657   else
1658     {
1659       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1660       emit_move_insn (op5, ret);
1661     }
1662   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1663   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1664   if (TARGET_POWERPC64)
1665     emit_insn (gen_muldi3 (op3, op3, op4));
1666   else
1667     {
1668       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1669       emit_move_insn (op3, ret);
1670     }
1671   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1672   DONE;
1673 }
1674   [(set_attr "type" "mul")])
1675
1676 (define_insn "*vsx_div<mode>3"
1677   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1678         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1679                    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1680   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1681   "xvdiv<VSs> %x0,%x1,%x2"
1682   [(set_attr "type" "<VStype_div>")
1683    (set_attr "fp_type" "<VSfptype_div>")])
1684
1685 ; Emulate vector with scalar for vec_div in V2DImode
1686 (define_insn_and_split "vsx_div_v2di"
1687   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1688         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1689                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1690                      UNSPEC_VSX_DIVSD))]
1691   "VECTOR_MEM_VSX_P (V2DImode)"
1692   "#"
1693   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1694   [(const_int 0)]
1695 {
1696   rtx op0 = operands[0];
1697   rtx op1 = operands[1];
1698   rtx op2 = operands[2];
1699   rtx op3 = gen_reg_rtx (DImode);
1700   rtx op4 = gen_reg_rtx (DImode);
1701   rtx op5 = gen_reg_rtx (DImode);
1702   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1703   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1704   if (TARGET_POWERPC64)
1705     emit_insn (gen_divdi3 (op5, op3, op4));
1706   else
1707     {
1708       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1709       rtx target = emit_library_call_value (libfunc,
1710                                             op5, LCT_NORMAL, DImode,
1711                                             op3, DImode,
1712                                             op4, DImode);
1713       emit_move_insn (op5, target);
1714     }
1715   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1716   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1717   if (TARGET_POWERPC64)
1718     emit_insn (gen_divdi3 (op3, op3, op4));
1719   else
1720     {
1721       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1722       rtx target = emit_library_call_value (libfunc,
1723                                             op3, LCT_NORMAL, DImode,
1724                                             op3, DImode,
1725                                             op4, DImode);
1726       emit_move_insn (op3, target);
1727     }
1728   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1729   DONE;
1730 }
1731   [(set_attr "type" "div")])
1732
1733 (define_insn_and_split "vsx_udiv_v2di"
1734   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1735         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1736                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1737                      UNSPEC_VSX_DIVUD))]
1738   "VECTOR_MEM_VSX_P (V2DImode)"
1739   "#"
1740   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1741   [(const_int 0)]
1742 {
1743   rtx op0 = operands[0];
1744   rtx op1 = operands[1];
1745   rtx op2 = operands[2];
1746   rtx op3 = gen_reg_rtx (DImode);
1747   rtx op4 = gen_reg_rtx (DImode);
1748   rtx op5 = gen_reg_rtx (DImode);
1749   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1750   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1751   if (TARGET_POWERPC64)
1752     emit_insn (gen_udivdi3 (op5, op3, op4));
1753   else
1754     {
1755       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1756       rtx target = emit_library_call_value (libfunc,
1757                                             op5, LCT_NORMAL, DImode,
1758                                             op3, DImode,
1759                                             op4, DImode);
1760       emit_move_insn (op5, target);
1761     }
1762   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1763   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1764   if (TARGET_POWERPC64)
1765     emit_insn (gen_udivdi3 (op3, op3, op4));
1766   else
1767     {
1768       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1769       rtx target = emit_library_call_value (libfunc,
1770                                             op3, LCT_NORMAL, DImode,
1771                                             op3, DImode,
1772                                             op4, DImode);
1773       emit_move_insn (op3, target);
1774     }
1775   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1776   DONE;
1777 }
1778   [(set_attr "type" "div")])
1779
1780 ;; *tdiv* instruction returning the FG flag
1781 (define_expand "vsx_tdiv<mode>3_fg"
1782   [(set (match_dup 3)
1783         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1784                       (match_operand:VSX_B 2 "vsx_register_operand")]
1785                      UNSPEC_VSX_TDIV))
1786    (set (match_operand:SI 0 "gpc_reg_operand")
1787         (gt:SI (match_dup 3)
1788                (const_int 0)))]
1789   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1790 {
1791   operands[3] = gen_reg_rtx (CCFPmode);
1792 })
1793
1794 ;; *tdiv* instruction returning the FE flag
1795 (define_expand "vsx_tdiv<mode>3_fe"
1796   [(set (match_dup 3)
1797         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1798                       (match_operand:VSX_B 2 "vsx_register_operand")]
1799                      UNSPEC_VSX_TDIV))
1800    (set (match_operand:SI 0 "gpc_reg_operand")
1801         (eq:SI (match_dup 3)
1802                (const_int 0)))]
1803   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1804 {
1805   operands[3] = gen_reg_rtx (CCFPmode);
1806 })
1807
1808 (define_insn "*vsx_tdiv<mode>3_internal"
1809   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1810         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1811                       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1812                    UNSPEC_VSX_TDIV))]
1813   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1814   "x<VSv>tdiv<VSs> %0,%x1,%x2"
1815   [(set_attr "type" "<VStype_simple>")
1816    (set_attr "fp_type" "<VSfptype_simple>")])
1817
1818 (define_insn "vsx_fre<mode>2"
1819   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1820         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1821                       UNSPEC_FRES))]
1822   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1823   "xvre<VSs> %x0,%x1"
1824   [(set_attr "type" "<VStype_simple>")
1825    (set_attr "fp_type" "<VSfptype_simple>")])
1826
1827 (define_insn "*vsx_neg<mode>2"
1828   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1829         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1830   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1831   "xvneg<VSs> %x0,%x1"
1832   [(set_attr "type" "<VStype_simple>")
1833    (set_attr "fp_type" "<VSfptype_simple>")])
1834
1835 (define_insn "*vsx_abs<mode>2"
1836   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1837         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1838   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1839   "xvabs<VSs> %x0,%x1"
1840   [(set_attr "type" "<VStype_simple>")
1841    (set_attr "fp_type" "<VSfptype_simple>")])
1842
1843 (define_insn "vsx_nabs<mode>2"
1844   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1845         (neg:VSX_F
1846          (abs:VSX_F
1847           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1848   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1849   "xvnabs<VSs> %x0,%x1"
1850   [(set_attr "type" "<VStype_simple>")
1851    (set_attr "fp_type" "<VSfptype_simple>")])
1852
1853 (define_insn "vsx_smax<mode>3"
1854   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1855         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1856                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1857   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1858   "xvmax<VSs> %x0,%x1,%x2"
1859   [(set_attr "type" "<VStype_simple>")
1860    (set_attr "fp_type" "<VSfptype_simple>")])
1861
1862 (define_insn "*vsx_smin<mode>3"
1863   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1864         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1865                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1866   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1867   "xvmin<VSs> %x0,%x1,%x2"
1868   [(set_attr "type" "<VStype_simple>")
1869    (set_attr "fp_type" "<VSfptype_simple>")])
1870
1871 (define_insn "*vsx_sqrt<mode>2"
1872   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1873         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1874   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1875   "xvsqrt<VSs> %x0,%x1"
1876   [(set_attr "type" "<VStype_sqrt>")
1877    (set_attr "fp_type" "<VSfptype_sqrt>")])
1878
1879 (define_insn "*vsx_rsqrte<mode>2"
1880   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1881         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1882                       UNSPEC_RSQRT))]
1883   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1884   "xvrsqrte<VSs> %x0,%x1"
1885   [(set_attr "type" "<VStype_simple>")
1886    (set_attr "fp_type" "<VSfptype_simple>")])
1887
1888 ;; *tsqrt* returning the fg flag
1889 (define_expand "vsx_tsqrt<mode>2_fg"
1890   [(set (match_dup 2)
1891         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1892                      UNSPEC_VSX_TSQRT))
1893    (set (match_operand:SI 0 "gpc_reg_operand")
1894         (gt:SI (match_dup 2)
1895                (const_int 0)))]
1896   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1897 {
1898   operands[2] = gen_reg_rtx (CCFPmode);
1899 })
1900
1901 ;; *tsqrt* returning the fe flag
1902 (define_expand "vsx_tsqrt<mode>2_fe"
1903   [(set (match_dup 2)
1904         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1905                      UNSPEC_VSX_TSQRT))
1906    (set (match_operand:SI 0 "gpc_reg_operand")
1907         (eq:SI (match_dup 2)
1908                (const_int 0)))]
1909   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1910 {
1911   operands[2] = gen_reg_rtx (CCFPmode);
1912 })
1913
1914 (define_insn "*vsx_tsqrt<mode>2_internal"
1915   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1916         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1917                      UNSPEC_VSX_TSQRT))]
1918   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1919   "x<VSv>tsqrt<VSs> %0,%x1"
1920   [(set_attr "type" "<VStype_simple>")
1921    (set_attr "fp_type" "<VSfptype_simple>")])
1922
1923 ;; Fused vector multiply/add instructions. Support the classical Altivec
1924 ;; versions of fma, which allows the target to be a separate register from the
1925 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1926 ;; multiply.
1927
1928 (define_insn "*vsx_fmav4sf4"
1929   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1930         (fma:V4SF
1931           (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1932           (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1933           (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1934   "VECTOR_UNIT_VSX_P (V4SFmode)"
1935   "@
1936    xvmaddasp %x0,%x1,%x2
1937    xvmaddmsp %x0,%x1,%x3
1938    xvmaddasp %x0,%x1,%x2
1939    xvmaddmsp %x0,%x1,%x3
1940    vmaddfp %0,%1,%2,%3"
1941   [(set_attr "type" "vecfloat")])
1942
1943 (define_insn "*vsx_fmav2df4"
1944   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1945         (fma:V2DF
1946           (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1947           (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1948           (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1949   "VECTOR_UNIT_VSX_P (V2DFmode)"
1950   "@
1951    xvmaddadp %x0,%x1,%x2
1952    xvmaddmdp %x0,%x1,%x3
1953    xvmaddadp %x0,%x1,%x2
1954    xvmaddmdp %x0,%x1,%x3"
1955   [(set_attr "type" "vecdouble")])
1956
1957 (define_insn "*vsx_fms<mode>4"
1958   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1959         (fma:VSX_F
1960           (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1961           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1962           (neg:VSX_F
1963             (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1964   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1965   "@
1966    xvmsuba<VSs> %x0,%x1,%x2
1967    xvmsubm<VSs> %x0,%x1,%x3
1968    xvmsuba<VSs> %x0,%x1,%x2
1969    xvmsubm<VSs> %x0,%x1,%x3"
1970   [(set_attr "type" "<VStype_mul>")])
1971
1972 (define_insn "*vsx_nfma<mode>4"
1973   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1974         (neg:VSX_F
1975          (fma:VSX_F
1976           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1977           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1978           (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1979   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1980   "@
1981    xvnmadda<VSs> %x0,%x1,%x2
1982    xvnmaddm<VSs> %x0,%x1,%x3
1983    xvnmadda<VSs> %x0,%x1,%x2
1984    xvnmaddm<VSs> %x0,%x1,%x3"
1985   [(set_attr "type" "<VStype_mul>")
1986    (set_attr "fp_type" "<VSfptype_mul>")])
1987
1988 (define_insn "*vsx_nfmsv4sf4"
1989   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1990         (neg:V4SF
1991          (fma:V4SF
1992            (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1993            (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1994            (neg:V4SF
1995              (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1996   "VECTOR_UNIT_VSX_P (V4SFmode)"
1997   "@
1998    xvnmsubasp %x0,%x1,%x2
1999    xvnmsubmsp %x0,%x1,%x3
2000    xvnmsubasp %x0,%x1,%x2
2001    xvnmsubmsp %x0,%x1,%x3
2002    vnmsubfp %0,%1,%2,%3"
2003   [(set_attr "type" "vecfloat")])
2004
2005 (define_insn "*vsx_nfmsv2df4"
2006   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
2007         (neg:V2DF
2008          (fma:V2DF
2009            (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
2010            (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
2011            (neg:V2DF
2012              (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
2013   "VECTOR_UNIT_VSX_P (V2DFmode)"
2014   "@
2015    xvnmsubadp %x0,%x1,%x2
2016    xvnmsubmdp %x0,%x1,%x3
2017    xvnmsubadp %x0,%x1,%x2
2018    xvnmsubmdp %x0,%x1,%x3"
2019   [(set_attr "type" "vecdouble")])
2020
2021 ;; Vector conditional expressions (no scalar version for these instructions)
2022 (define_insn "vsx_eq<mode>"
2023   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2024         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2025                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2026   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2027   "xvcmpeq<VSs> %x0,%x1,%x2"
2028   [(set_attr "type" "<VStype_simple>")
2029    (set_attr "fp_type" "<VSfptype_simple>")])
2030
2031 (define_insn "vsx_gt<mode>"
2032   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2033         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2034                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2035   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2036   "xvcmpgt<VSs> %x0,%x1,%x2"
2037   [(set_attr "type" "<VStype_simple>")
2038    (set_attr "fp_type" "<VSfptype_simple>")])
2039
2040 (define_insn "*vsx_ge<mode>"
2041   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2042         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2043                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2044   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2045   "xvcmpge<VSs> %x0,%x1,%x2"
2046   [(set_attr "type" "<VStype_simple>")
2047    (set_attr "fp_type" "<VSfptype_simple>")])
2048
2049 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2050 ;; indicate a combined status
2051 (define_insn "*vsx_eq_<mode>_p"
2052   [(set (reg:CC CR6_REGNO)
2053         (unspec:CC
2054          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2055                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2056          UNSPEC_PREDICATE))
2057    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2058         (eq:VSX_F (match_dup 1)
2059                   (match_dup 2)))]
2060   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2061   "xvcmpeq<VSs>. %x0,%x1,%x2"
2062   [(set_attr "type" "<VStype_simple>")])
2063
2064 (define_insn "*vsx_gt_<mode>_p"
2065   [(set (reg:CC CR6_REGNO)
2066         (unspec:CC
2067          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2068                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2069          UNSPEC_PREDICATE))
2070    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2071         (gt:VSX_F (match_dup 1)
2072                   (match_dup 2)))]
2073   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2074   "xvcmpgt<VSs>. %x0,%x1,%x2"
2075   [(set_attr "type" "<VStype_simple>")])
2076
2077 (define_insn "*vsx_ge_<mode>_p"
2078   [(set (reg:CC CR6_REGNO)
2079         (unspec:CC
2080          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2081                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2082          UNSPEC_PREDICATE))
2083    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2084         (ge:VSX_F (match_dup 1)
2085                   (match_dup 2)))]
2086   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2087   "xvcmpge<VSs>. %x0,%x1,%x2"
2088   [(set_attr "type" "<VStype_simple>")])
2089
2090 ;; Vector select
2091 (define_insn "*vsx_xxsel<mode>"
2092   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2093         (if_then_else:VSX_L
2094          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2095                 (match_operand:VSX_L 4 "zero_constant" ""))
2096          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2097          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2098   "VECTOR_MEM_VSX_P (<MODE>mode)"
2099   "xxsel %x0,%x3,%x2,%x1"
2100   [(set_attr "type" "vecmove")])
2101
2102 (define_insn "*vsx_xxsel<mode>_uns"
2103   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2104         (if_then_else:VSX_L
2105          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2106                    (match_operand:VSX_L 4 "zero_constant" ""))
2107          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2108          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2109   "VECTOR_MEM_VSX_P (<MODE>mode)"
2110   "xxsel %x0,%x3,%x2,%x1"
2111   [(set_attr "type" "vecmove")])
2112
2113 ;; Copy sign
2114 (define_insn "vsx_copysign<mode>3"
2115   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2116         (unspec:VSX_F
2117          [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2118           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
2119          UNSPEC_COPYSIGN))]
2120   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2121   "xvcpsgn<VSs> %x0,%x2,%x1"
2122   [(set_attr "type" "<VStype_simple>")
2123    (set_attr "fp_type" "<VSfptype_simple>")])
2124
2125 ;; For the conversions, limit the register class for the integer value to be
2126 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2127 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2128 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2129 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2130 ;; in allowing virtual registers.
2131 (define_insn "vsx_float<VSi><mode>2"
2132   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2133         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2134   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2135   "xvcvsx<VSc><VSs> %x0,%x1"
2136   [(set_attr "type" "<VStype_simple>")
2137    (set_attr "fp_type" "<VSfptype_simple>")])
2138
2139 (define_insn "vsx_floatuns<VSi><mode>2"
2140   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2141         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2142   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2143   "xvcvux<VSc><VSs> %x0,%x1"
2144   [(set_attr "type" "<VStype_simple>")
2145    (set_attr "fp_type" "<VSfptype_simple>")])
2146
2147 (define_insn "vsx_fix_trunc<mode><VSi>2"
2148   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2149         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2150   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2151   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2152   [(set_attr "type" "<VStype_simple>")
2153    (set_attr "fp_type" "<VSfptype_simple>")])
2154
2155 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2156   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2157         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2158   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2159   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2160   [(set_attr "type" "<VStype_simple>")
2161    (set_attr "fp_type" "<VSfptype_simple>")])
2162
2163 ;; Math rounding functions
2164 (define_insn "vsx_x<VSv>r<VSs>i"
2165   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2166         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2167                       UNSPEC_VSX_ROUND_I))]
2168   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2169   "x<VSv>r<VSs>i %x0,%x1"
2170   [(set_attr "type" "<VStype_simple>")
2171    (set_attr "fp_type" "<VSfptype_simple>")])
2172
2173 (define_insn "vsx_x<VSv>r<VSs>ic"
2174   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2175         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2176                       UNSPEC_VSX_ROUND_IC))]
2177   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2178   "x<VSv>r<VSs>ic %x0,%x1"
2179   [(set_attr "type" "<VStype_simple>")
2180    (set_attr "fp_type" "<VSfptype_simple>")])
2181
2182 (define_insn "vsx_btrunc<mode>2"
2183   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2184         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
2185   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2186   "xvr<VSs>iz %x0,%x1"
2187   [(set_attr "type" "<VStype_simple>")
2188    (set_attr "fp_type" "<VSfptype_simple>")])
2189
2190 (define_insn "*vsx_b2trunc<mode>2"
2191   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2192         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2193                       UNSPEC_FRIZ))]
2194   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2195   "x<VSv>r<VSs>iz %x0,%x1"
2196   [(set_attr "type" "<VStype_simple>")
2197    (set_attr "fp_type" "<VSfptype_simple>")])
2198
2199 (define_insn "vsx_floor<mode>2"
2200   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2201         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2202                       UNSPEC_FRIM))]
2203   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2204   "xvr<VSs>im %x0,%x1"
2205   [(set_attr "type" "<VStype_simple>")
2206    (set_attr "fp_type" "<VSfptype_simple>")])
2207
2208 (define_insn "vsx_ceil<mode>2"
2209   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2210         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2211                       UNSPEC_FRIP))]
2212   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2213   "xvr<VSs>ip %x0,%x1"
2214   [(set_attr "type" "<VStype_simple>")
2215    (set_attr "fp_type" "<VSfptype_simple>")])
2216
2217 \f
2218 ;; VSX convert to/from double vector
2219
2220 ;; Convert between single and double precision
2221 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2222 ;; scalar single precision instructions internally use the double format.
2223 ;; Prefer the altivec registers, since we likely will need to do a vperm
2224 (define_insn "vsx_<VS_spdp_insn>"
2225   [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2226         (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
2227                               UNSPEC_VSX_CVSPDP))]
2228   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2229   "<VS_spdp_insn> %x0,%x1"
2230   [(set_attr "type" "<VS_spdp_type>")])
2231
2232 ;; xscvspdp, represent the scalar SF type as V4SF
2233 (define_insn "vsx_xscvspdp"
2234   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2235         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2236                    UNSPEC_VSX_CVSPDP))]
2237   "VECTOR_UNIT_VSX_P (V4SFmode)"
2238   "xscvspdp %x0,%x1"
2239   [(set_attr "type" "fp")])
2240
2241 ;; Same as vsx_xscvspdp, but use SF as the type
2242 (define_insn "vsx_xscvspdp_scalar2"
2243   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2244         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2245                    UNSPEC_VSX_CVSPDP))]
2246   "VECTOR_UNIT_VSX_P (V4SFmode)"
2247   "xscvspdp %x0,%x1"
2248   [(set_attr "type" "fp")])
2249
2250 ;; Generate xvcvhpsp instruction
2251 (define_insn "vsx_xvcvhpsp"
2252   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2253         (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2254                      UNSPEC_VSX_CVHPSP))]
2255   "TARGET_P9_VECTOR"
2256   "xvcvhpsp %x0,%x1"
2257   [(set_attr "type" "vecfloat")])
2258
2259 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2260 ;; format of scalars is actually DF.
2261 (define_insn "vsx_xscvdpsp_scalar"
2262   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2263         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2264                      UNSPEC_VSX_CVSPDP))]
2265   "VECTOR_UNIT_VSX_P (V4SFmode)"
2266   "xscvdpsp %x0,%x1"
2267   [(set_attr "type" "fp")])
2268
2269 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2270 (define_insn "vsx_xscvdpspn"
2271   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2272         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
2273                      UNSPEC_VSX_CVDPSPN))]
2274   "TARGET_XSCVDPSPN"
2275   "xscvdpspn %x0,%x1"
2276   [(set_attr "type" "fp")])
2277
2278 (define_insn "vsx_xscvspdpn"
2279   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2280         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2281                    UNSPEC_VSX_CVSPDPN))]
2282   "TARGET_XSCVSPDPN"
2283   "xscvspdpn %x0,%x1"
2284   [(set_attr "type" "fp")])
2285
2286 (define_insn "vsx_xscvdpspn_scalar"
2287   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2288         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2289                      UNSPEC_VSX_CVDPSPN))]
2290   "TARGET_XSCVDPSPN"
2291   "xscvdpspn %x0,%x1"
2292   [(set_attr "type" "fp")])
2293
2294 ;; Used by direct move to move a SFmode value from GPR to VSX register
2295 (define_insn "vsx_xscvspdpn_directmove"
2296   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2297         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2298                    UNSPEC_VSX_CVSPDPN))]
2299   "TARGET_XSCVSPDPN"
2300   "xscvspdpn %x0,%x1"
2301   [(set_attr "type" "fp")])
2302
2303 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2304
2305 (define_expand "vsx_xvcvsxddp_scale"
2306   [(match_operand:V2DF 0 "vsx_register_operand")
2307    (match_operand:V2DI 1 "vsx_register_operand")
2308    (match_operand:QI 2 "immediate_operand")]
2309   "VECTOR_UNIT_VSX_P (V2DFmode)"
2310 {
2311   rtx op0 = operands[0];
2312   rtx op1 = operands[1];
2313   int scale = INTVAL(operands[2]);
2314   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2315   if (scale != 0)
2316     rs6000_scale_v2df (op0, op0, -scale);
2317   DONE;
2318 })
2319
2320 (define_insn "vsx_xvcvsxddp"
2321   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2322         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2323                      UNSPEC_VSX_XVCVSXDDP))]
2324   "VECTOR_UNIT_VSX_P (V2DFmode)"
2325   "xvcvsxddp %x0,%x1"
2326   [(set_attr "type" "vecdouble")])
2327
2328 (define_expand "vsx_xvcvuxddp_scale"
2329   [(match_operand:V2DF 0 "vsx_register_operand")
2330    (match_operand:V2DI 1 "vsx_register_operand")
2331    (match_operand:QI 2 "immediate_operand")]
2332   "VECTOR_UNIT_VSX_P (V2DFmode)"
2333 {
2334   rtx op0 = operands[0];
2335   rtx op1 = operands[1];
2336   int scale = INTVAL(operands[2]);
2337   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2338   if (scale != 0)
2339     rs6000_scale_v2df (op0, op0, -scale);
2340   DONE;
2341 })
2342
2343 (define_insn "vsx_xvcvuxddp"
2344   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2345         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2346                      UNSPEC_VSX_XVCVUXDDP))]
2347   "VECTOR_UNIT_VSX_P (V2DFmode)"
2348   "xvcvuxddp %x0,%x1"
2349   [(set_attr "type" "vecdouble")])
2350
2351 (define_expand "vsx_xvcvdpsxds_scale"
2352   [(match_operand:V2DI 0 "vsx_register_operand")
2353    (match_operand:V2DF 1 "vsx_register_operand")
2354    (match_operand:QI 2 "immediate_operand")]
2355   "VECTOR_UNIT_VSX_P (V2DFmode)"
2356 {
2357   rtx op0 = operands[0];
2358   rtx op1 = operands[1];
2359   rtx tmp;
2360   int scale = INTVAL (operands[2]);
2361   if (scale == 0)
2362     tmp = op1;
2363   else
2364     {
2365       tmp  = gen_reg_rtx (V2DFmode);
2366       rs6000_scale_v2df (tmp, op1, scale);
2367     }
2368   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2369   DONE;
2370 })
2371
2372 ;; convert vector of 64-bit floating point numbers to vector of
2373 ;; 64-bit signed integer
2374 (define_insn "vsx_xvcvdpsxds"
2375   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2376         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2377                      UNSPEC_VSX_XVCVDPSXDS))]
2378   "VECTOR_UNIT_VSX_P (V2DFmode)"
2379   "xvcvdpsxds %x0,%x1"
2380   [(set_attr "type" "vecdouble")])
2381
2382 ;; convert vector of 32-bit floating point numbers to vector of
2383 ;; 32-bit signed integer
2384 (define_insn "vsx_xvcvspsxws"
2385   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2386         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2387                      UNSPEC_VSX_XVCVSPSXWS))]
2388   "VECTOR_UNIT_VSX_P (V4SFmode)"
2389   "xvcvspsxws %x0,%x1"
2390   [(set_attr "type" "vecfloat")])
2391
2392 ;; convert vector of 64-bit floating point numbers to vector of
2393 ;; 64-bit unsigned integer
2394 (define_expand "vsx_xvcvdpuxds_scale"
2395   [(match_operand:V2DI 0 "vsx_register_operand")
2396    (match_operand:V2DF 1 "vsx_register_operand")
2397    (match_operand:QI 2 "immediate_operand")]
2398   "VECTOR_UNIT_VSX_P (V2DFmode)"
2399 {
2400   rtx op0 = operands[0];
2401   rtx op1 = operands[1];
2402   rtx tmp;
2403   int scale = INTVAL (operands[2]);
2404   if (scale == 0)
2405     tmp = op1;
2406   else
2407     {
2408       tmp = gen_reg_rtx (V2DFmode);
2409       rs6000_scale_v2df (tmp, op1, scale);
2410     }
2411   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2412   DONE;
2413 })
2414
2415 ;; convert vector of 32-bit floating point numbers to vector of
2416 ;; 32-bit unsigned integer
2417 (define_insn "vsx_xvcvspuxws"
2418   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2419         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2420                      UNSPEC_VSX_XVCVSPSXWS))]
2421   "VECTOR_UNIT_VSX_P (V4SFmode)"
2422   "xvcvspuxws %x0,%x1"
2423   [(set_attr "type" "vecfloat")])
2424
2425 (define_insn "vsx_xvcvdpuxds"
2426   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2427         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2428                      UNSPEC_VSX_XVCVDPUXDS))]
2429   "VECTOR_UNIT_VSX_P (V2DFmode)"
2430   "xvcvdpuxds %x0,%x1"
2431   [(set_attr "type" "vecdouble")])
2432
2433 ;; Convert from 64-bit to 32-bit types
2434 ;; Note, favor the Altivec registers since the usual use of these instructions
2435 ;; is in vector converts and we need to use the Altivec vperm instruction.
2436
2437 (define_insn "vsx_xvcvdpsxws"
2438   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2439         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2440                      UNSPEC_VSX_CVDPSXWS))]
2441   "VECTOR_UNIT_VSX_P (V2DFmode)"
2442   "xvcvdpsxws %x0,%x1"
2443   [(set_attr "type" "vecdouble")])
2444
2445 (define_insn "vsx_xvcvdpuxws"
2446   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2447         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2448                      UNSPEC_VSX_CVDPUXWS))]
2449   "VECTOR_UNIT_VSX_P (V2DFmode)"
2450   "xvcvdpuxws %x0,%x1"
2451   [(set_attr "type" "vecdouble")])
2452
2453 (define_insn "vsx_xvcvsxdsp"
2454   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2455         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2456                      UNSPEC_VSX_CVSXDSP))]
2457   "VECTOR_UNIT_VSX_P (V2DFmode)"
2458   "xvcvsxdsp %x0,%x1"
2459   [(set_attr "type" "vecfloat")])
2460
2461 (define_insn "vsx_xvcvuxdsp"
2462   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2463         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2464                      UNSPEC_VSX_CVUXDSP))]
2465   "VECTOR_UNIT_VSX_P (V2DFmode)"
2466   "xvcvuxdsp %x0,%x1"
2467   [(set_attr "type" "vecdouble")])
2468
2469 (define_insn "vsx_xvcdpsp"
2470   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2471         (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2472                      UNSPEC_VSX_XVCDPSP))]
2473   "VECTOR_UNIT_VSX_P (V2DFmode)"
2474   "xvcvdpsp %x0,%x1"
2475   [(set_attr "type" "vecdouble")])
2476
2477 ;; Convert from 32-bit to 64-bit types
2478 ;; Provide both vector and scalar targets
2479 (define_insn "vsx_xvcvsxwdp"
2480   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2481         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2482                      UNSPEC_VSX_CVSXWDP))]
2483   "VECTOR_UNIT_VSX_P (V2DFmode)"
2484   "xvcvsxwdp %x0,%x1"
2485   [(set_attr "type" "vecdouble")])
2486
2487 (define_insn "vsx_xvcvsxwdp_df"
2488   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2489         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2490                    UNSPEC_VSX_CVSXWDP))]
2491   "TARGET_VSX"
2492   "xvcvsxwdp %x0,%x1"
2493   [(set_attr "type" "vecdouble")])
2494
2495 (define_insn "vsx_xvcvuxwdp"
2496   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2497         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2498                      UNSPEC_VSX_CVUXWDP))]
2499   "VECTOR_UNIT_VSX_P (V2DFmode)"
2500   "xvcvuxwdp %x0,%x1"
2501   [(set_attr "type" "vecdouble")])
2502
2503 (define_insn "vsx_xvcvuxwdp_df"
2504   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2505         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2506                    UNSPEC_VSX_CVUXWDP))]
2507   "TARGET_VSX"
2508   "xvcvuxwdp %x0,%x1"
2509   [(set_attr "type" "vecdouble")])
2510
2511 (define_insn "vsx_xvcvspsxds"
2512   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2513         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2514                      UNSPEC_VSX_CVSPSXDS))]
2515   "VECTOR_UNIT_VSX_P (V2DFmode)"
2516   "xvcvspsxds %x0,%x1"
2517   [(set_attr "type" "vecdouble")])
2518
2519 (define_insn "vsx_xvcvspuxds"
2520   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2521         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2522                      UNSPEC_VSX_CVSPUXDS))]
2523   "VECTOR_UNIT_VSX_P (V2DFmode)"
2524   "xvcvspuxds %x0,%x1"
2525   [(set_attr "type" "vecdouble")])
2526
2527 (define_insn "vsx_xvcvsxwsp"
2528   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2529         (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2530                      UNSPEC_VSX_CVSXWSP))]
2531   "VECTOR_UNIT_VSX_P (V4SFmode)"
2532   "xvcvsxwsp %x0,%x1"
2533   [(set_attr "type" "vecfloat")])
2534
2535 (define_insn "vsx_xvcvuxwsp"
2536   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2537         (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2538                     UNSPEC_VSX_CVUXWSP))]
2539   "VECTOR_UNIT_VSX_P (V4SFmode)"
2540   "xvcvuxwsp %x0,%x1"
2541   [(set_attr "type" "vecfloat")])
2542
2543 ;; Generate float2 double
2544 ;; convert two double to float
2545 (define_expand "float2_v2df"
2546   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2547    (use (match_operand:V2DF 1 "register_operand" "wa"))
2548    (use (match_operand:V2DF 2 "register_operand" "wa"))]
2549  "VECTOR_UNIT_VSX_P (V4SFmode)"
2550 {
2551   rtx rtx_src1, rtx_src2, rtx_dst;
2552
2553   rtx_dst = operands[0];
2554   rtx_src1 = operands[1];
2555   rtx_src2 = operands[2];
2556
2557   rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2558   DONE;
2559 })
2560
2561 ;; Generate float2
2562 ;; convert two long long signed ints to float
2563 (define_expand "float2_v2di"
2564   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2565    (use (match_operand:V2DI 1 "register_operand" "wa"))
2566    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2567  "VECTOR_UNIT_VSX_P (V4SFmode)"
2568 {
2569   rtx rtx_src1, rtx_src2, rtx_dst;
2570
2571   rtx_dst = operands[0];
2572   rtx_src1 = operands[1];
2573   rtx_src2 = operands[2];
2574
2575   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2576   DONE;
2577 })
2578
2579 ;; Generate uns_float2
2580 ;; convert two long long unsigned ints to float
2581 (define_expand "uns_float2_v2di"
2582   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2583    (use (match_operand:V2DI 1 "register_operand" "wa"))
2584    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2585  "VECTOR_UNIT_VSX_P (V4SFmode)"
2586 {
2587   rtx rtx_src1, rtx_src2, rtx_dst;
2588
2589   rtx_dst = operands[0];
2590   rtx_src1 = operands[1];
2591   rtx_src2 = operands[2];
2592
2593   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2594   DONE;
2595 })
2596
2597 ;; Generate floate
2598 ;; convert  double or long long signed to float
2599 ;; (Only even words are valid, BE numbering)
2600 (define_expand "floate<mode>"
2601   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2602    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2603   "VECTOR_UNIT_VSX_P (V4SFmode)"
2604 {
2605   if (VECTOR_ELT_ORDER_BIG)
2606     {
2607       /* Shift left one word to put even word correct location */
2608       rtx rtx_tmp;
2609       rtx rtx_val = GEN_INT (4);
2610
2611       rtx_tmp = gen_reg_rtx (V4SFmode);
2612       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2613       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2614                  rtx_tmp, rtx_tmp, rtx_val));
2615     }
2616   else
2617     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2618
2619   DONE;
2620 })
2621
2622 ;; Generate uns_floate
2623 ;; convert long long unsigned to float
2624 ;; (Only even words are valid, BE numbering)
2625 (define_expand "unsfloatev2di"
2626   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2627    (use (match_operand:V2DI 1 "register_operand" "wa"))]
2628   "VECTOR_UNIT_VSX_P (V4SFmode)"
2629 {
2630   if (VECTOR_ELT_ORDER_BIG)
2631     {
2632       /* Shift left one word to put even word correct location */
2633       rtx rtx_tmp;
2634       rtx rtx_val = GEN_INT (4);
2635
2636       rtx_tmp = gen_reg_rtx (V4SFmode);
2637       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2638       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2639                  rtx_tmp, rtx_tmp, rtx_val));
2640     }
2641   else
2642     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2643
2644   DONE;
2645 })
2646
2647 ;; Generate floato
2648 ;; convert double or long long signed to float
2649 ;; Only odd words are valid, BE numbering)
2650 (define_expand "floato<mode>"
2651   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2652    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2653   "VECTOR_UNIT_VSX_P (V4SFmode)"
2654 {
2655   if (VECTOR_ELT_ORDER_BIG)
2656     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2657   else
2658     {
2659       /* Shift left one word to put odd word correct location */
2660       rtx rtx_tmp;
2661       rtx rtx_val = GEN_INT (4);
2662
2663       rtx_tmp = gen_reg_rtx (V4SFmode);
2664       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2665       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2666                  rtx_tmp, rtx_tmp, rtx_val));
2667     }
2668   DONE;
2669 })
2670
2671 ;; Generate uns_floato
2672 ;; convert long long unsigned to float
2673 ;; (Only odd words are valid, BE numbering)
2674 (define_expand "unsfloatov2di"
2675  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2676   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2677  "VECTOR_UNIT_VSX_P (V4SFmode)"
2678 {
2679   if (VECTOR_ELT_ORDER_BIG)
2680     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2681   else
2682     {
2683       /* Shift left one word to put odd word correct location */
2684       rtx rtx_tmp;
2685       rtx rtx_val = GEN_INT (4);
2686
2687       rtx_tmp = gen_reg_rtx (V4SFmode);
2688       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2689       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2690                  rtx_tmp, rtx_tmp, rtx_val));
2691     }
2692   DONE;
2693 })
2694
2695 ;; Generate vsigned2
2696 ;; convert two double float vectors to a vector of single precision ints
2697 (define_expand "vsigned2_v2df"
2698   [(match_operand:V4SI 0 "register_operand" "=wa")
2699    (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2700                  (match_operand:V2DF 2 "register_operand" "wa")]
2701   UNSPEC_VSX_VSIGNED2)]
2702   "TARGET_VSX"
2703 {
2704   rtx rtx_src1, rtx_src2, rtx_dst;
2705   bool signed_convert=true;
2706
2707   rtx_dst = operands[0];
2708   rtx_src1 = operands[1];
2709   rtx_src2 = operands[2];
2710
2711   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2712   DONE;
2713 })
2714
2715 ;; Generate vsignedo_v2df
2716 ;; signed double float to int convert odd word
2717 (define_expand "vsignedo_v2df"
2718   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2719         (match_operand:V2DF 1 "register_operand" "wa"))]
2720   "TARGET_VSX"
2721 {
2722   if (VECTOR_ELT_ORDER_BIG)
2723     {
2724       rtx rtx_tmp;
2725       rtx rtx_val = GEN_INT (12);
2726       rtx_tmp = gen_reg_rtx (V4SImode);
2727
2728       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2729
2730       /* Big endian word numbering for words in operand is 0 1 2 3.
2731          take (operand[1] operand[1]) and shift left one word
2732          0 1 2 3    0 1 2 3  =>  1 2 3 0
2733          Words 1 and 3 are now are now where they need to be for result.  */
2734
2735       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2736                  rtx_tmp, rtx_val));
2737     }
2738   else
2739     /* Little endian word numbering for operand is 3 2 1 0.
2740        Result words 3 and 1 are where they need to be.  */
2741     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2742
2743   DONE;
2744 }
2745   [(set_attr "type" "veccomplex")])
2746
2747 ;; Generate vsignede_v2df
2748 ;; signed double float to int even word
2749 (define_expand "vsignede_v2df"
2750   [(set (match_operand:V4SI 0 "register_operand" "=v")
2751         (match_operand:V2DF 1 "register_operand" "v"))]
2752   "TARGET_VSX"
2753 {
2754   if (VECTOR_ELT_ORDER_BIG)
2755     /* Big endian word numbering for words in operand is 0 1
2756        Result words 0 is where they need to be.  */
2757     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2758
2759   else
2760     {
2761       rtx rtx_tmp;
2762       rtx rtx_val = GEN_INT (12);
2763       rtx_tmp = gen_reg_rtx (V4SImode);
2764
2765       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2766
2767       /* Little endian word numbering for operand is 3 2 1 0.
2768          take (operand[1] operand[1]) and shift left three words
2769          0 1 2 3   0 1 2 3  =>  3 0 1 2
2770          Words 0 and 2 are now where they need to be for the result.  */
2771       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2772                  rtx_tmp, rtx_val));
2773     }
2774   DONE;
2775 }
2776   [(set_attr "type" "veccomplex")])
2777
2778 ;; Generate unsigned2
2779 ;; convert two double float vectors to a vector of single precision
2780 ;; unsigned ints
2781 (define_expand "vunsigned2_v2df"
2782 [(match_operand:V4SI 0 "register_operand" "=v")
2783  (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2784                (match_operand:V2DF 2 "register_operand" "v")]
2785               UNSPEC_VSX_VSIGNED2)]
2786  "TARGET_VSX"
2787 {
2788   rtx rtx_src1, rtx_src2, rtx_dst;
2789   bool signed_convert=false;
2790
2791   rtx_dst = operands[0];
2792   rtx_src1 = operands[1];
2793   rtx_src2 = operands[2];
2794
2795   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2796   DONE;
2797 })
2798
2799 ;; Generate vunsignedo_v2df
2800 ;; unsigned double float to int convert odd word
2801 (define_expand "vunsignedo_v2df"
2802   [(set (match_operand:V4SI 0 "register_operand" "=v")
2803         (match_operand:V2DF 1 "register_operand" "v"))]
2804   "TARGET_VSX"
2805 {
2806   if (VECTOR_ELT_ORDER_BIG)
2807     {
2808       rtx rtx_tmp;
2809       rtx rtx_val = GEN_INT (12);
2810       rtx_tmp = gen_reg_rtx (V4SImode);
2811
2812       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2813
2814       /* Big endian word numbering for words in operand is 0 1 2 3.
2815          take (operand[1] operand[1]) and shift left one word
2816          0 1 2 3    0 1 2 3  =>  1 2 3 0
2817          Words 1 and 3 are now are now where they need to be for result.  */
2818
2819       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2820                  rtx_tmp, rtx_val));
2821     }
2822   else
2823     /* Little endian word numbering for operand is 3 2 1 0.
2824        Result words 3 and 1 are where they need to be.  */
2825     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2826
2827   DONE;
2828 }
2829   [(set_attr "type" "veccomplex")])
2830
2831 ;; Generate vunsignede_v2df
2832 ;; unsigned double float to int even word
2833 (define_expand "vunsignede_v2df"
2834   [(set (match_operand:V4SI 0 "register_operand" "=v")
2835         (match_operand:V2DF 1 "register_operand" "v"))]
2836   "TARGET_VSX"
2837 {
2838   if (VECTOR_ELT_ORDER_BIG)
2839     /* Big endian word numbering for words in operand is 0 1
2840        Result words 0 is where they need to be.  */
2841     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2842
2843   else
2844     {
2845       rtx rtx_tmp;
2846       rtx rtx_val = GEN_INT (12);
2847       rtx_tmp = gen_reg_rtx (V4SImode);
2848
2849       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2850
2851       /* Little endian word numbering for operand is 3 2 1 0.
2852          take (operand[1] operand[1]) and shift left three words
2853          0 1 2 3   0 1 2 3  =>  3 0 1 2
2854          Words 0 and 2 are now where they need to be for the result.  */
2855       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2856                  rtx_tmp, rtx_val));
2857     }
2858   DONE;
2859 }
2860   [(set_attr "type" "veccomplex")])
2861
2862 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2863 ;; since the xvrdpiz instruction does not truncate the value if the floating
2864 ;; point value is < LONG_MIN or > LONG_MAX.
2865 (define_insn "*vsx_float_fix_v2df2"
2866   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2867         (float:V2DF
2868          (fix:V2DI
2869           (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2870   "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
2871    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2872    && !flag_trapping_math && TARGET_FRIZ"
2873   "xvrdpiz %x0,%x1"
2874   [(set_attr "type" "vecdouble")
2875    (set_attr "fp_type" "fp_addsub_d")])
2876
2877 \f
2878 ;; Permute operations
2879
2880 ;; Build a V2DF/V2DI vector from two scalars
2881 (define_insn "vsx_concat_<mode>"
2882   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2883         (vec_concat:VSX_D
2884          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2885          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2886   "VECTOR_MEM_VSX_P (<MODE>mode)"
2887 {
2888   if (which_alternative == 0)
2889     return (BYTES_BIG_ENDIAN
2890             ? "xxpermdi %x0,%x1,%x2,0"
2891             : "xxpermdi %x0,%x2,%x1,0");
2892
2893   else if (which_alternative == 1)
2894     return (BYTES_BIG_ENDIAN
2895             ? "mtvsrdd %x0,%1,%2"
2896             : "mtvsrdd %x0,%2,%1");
2897
2898   else
2899     gcc_unreachable ();
2900 }
2901   [(set_attr "type" "vecperm")])
2902
2903 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2904 ;; word element in a vector register.
2905 (define_insn "*vsx_concat_<mode>_1"
2906   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2907         (vec_concat:VSX_D
2908          (vec_select:<VS_scalar>
2909           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2910           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2911          (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2912   "VECTOR_MEM_VSX_P (<MODE>mode)"
2913 {
2914   HOST_WIDE_INT dword = INTVAL (operands[2]);
2915   if (BYTES_BIG_ENDIAN)
2916     {
2917       operands[4] = GEN_INT (2*dword);
2918       return "xxpermdi %x0,%x1,%x3,%4";
2919     }
2920   else
2921     {
2922       operands[4] = GEN_INT (!dword);
2923       return "xxpermdi %x0,%x3,%x1,%4";
2924     }
2925 }
2926   [(set_attr "type" "vecperm")])
2927
2928 (define_insn "*vsx_concat_<mode>_2"
2929   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2930         (vec_concat:VSX_D
2931          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2932          (vec_select:<VS_scalar>
2933           (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2934           (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2935   "VECTOR_MEM_VSX_P (<MODE>mode)"
2936 {
2937   HOST_WIDE_INT dword = INTVAL (operands[3]);
2938   if (BYTES_BIG_ENDIAN)
2939     {
2940       operands[4] = GEN_INT (dword);
2941       return "xxpermdi %x0,%x1,%x2,%4";
2942     }
2943   else
2944     {
2945       operands[4] = GEN_INT (2 * !dword);
2946       return "xxpermdi %x0,%x2,%x1,%4";
2947     }
2948 }
2949   [(set_attr "type" "vecperm")])
2950
2951 (define_insn "*vsx_concat_<mode>_3"
2952   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2953         (vec_concat:VSX_D
2954          (vec_select:<VS_scalar>
2955           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2956           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2957          (vec_select:<VS_scalar>
2958           (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2959           (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2960   "VECTOR_MEM_VSX_P (<MODE>mode)"
2961 {
2962   HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2963   HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2964   if (BYTES_BIG_ENDIAN)
2965     {
2966       operands[5] = GEN_INT ((2 * dword1) + dword2);
2967       return "xxpermdi %x0,%x1,%x3,%5";
2968     }
2969   else
2970     {
2971       operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2972       return "xxpermdi %x0,%x3,%x1,%5";
2973     }
2974 }
2975   [(set_attr "type" "vecperm")])
2976
2977 ;; Special purpose concat using xxpermdi to glue two single precision values
2978 ;; together, relying on the fact that internally scalar floats are represented
2979 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2980 (define_insn "vsx_concat_v2sf"
2981   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2982         (unspec:V2DF
2983          [(match_operand:SF 1 "vsx_register_operand" "ww")
2984           (match_operand:SF 2 "vsx_register_operand" "ww")]
2985          UNSPEC_VSX_CONCAT))]
2986   "VECTOR_MEM_VSX_P (V2DFmode)"
2987 {
2988   if (BYTES_BIG_ENDIAN)
2989     return "xxpermdi %x0,%x1,%x2,0";
2990   else
2991     return "xxpermdi %x0,%x2,%x1,0";
2992 }
2993   [(set_attr "type" "vecperm")])
2994
2995 ;; V4SImode initialization splitter
2996 (define_insn_and_split "vsx_init_v4si"
2997   [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2998         (unspec:V4SI
2999          [(match_operand:SI 1 "reg_or_cint_operand" "rn")
3000           (match_operand:SI 2 "reg_or_cint_operand" "rn")
3001           (match_operand:SI 3 "reg_or_cint_operand" "rn")
3002           (match_operand:SI 4 "reg_or_cint_operand" "rn")]
3003          UNSPEC_VSX_VEC_INIT))
3004    (clobber (match_scratch:DI 5 "=&r"))
3005    (clobber (match_scratch:DI 6 "=&r"))]
3006    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3007    "#"
3008    "&& reload_completed"
3009    [(const_int 0)]
3010 {
3011   rs6000_split_v4si_init (operands);
3012   DONE;
3013 })
3014
3015 ;; xxpermdi for little endian loads and stores.  We need several of
3016 ;; these since the form of the PARALLEL differs by mode.
3017 (define_insn "*vsx_xxpermdi2_le_<mode>"
3018   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3019         (vec_select:VSX_D
3020           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3021           (parallel [(const_int 1) (const_int 0)])))]
3022   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3023   "xxpermdi %x0,%x1,%x1,2"
3024   [(set_attr "type" "vecperm")])
3025
3026 (define_insn "*vsx_xxpermdi4_le_<mode>"
3027   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3028         (vec_select:VSX_W
3029           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3030           (parallel [(const_int 2) (const_int 3)
3031                      (const_int 0) (const_int 1)])))]
3032   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3033   "xxpermdi %x0,%x1,%x1,2"
3034   [(set_attr "type" "vecperm")])
3035
3036 (define_insn "*vsx_xxpermdi8_le_V8HI"
3037   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3038         (vec_select:V8HI
3039           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3040           (parallel [(const_int 4) (const_int 5)
3041                      (const_int 6) (const_int 7)
3042                      (const_int 0) (const_int 1)
3043                      (const_int 2) (const_int 3)])))]
3044   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
3045   "xxpermdi %x0,%x1,%x1,2"
3046   [(set_attr "type" "vecperm")])
3047
3048 (define_insn "*vsx_xxpermdi16_le_V16QI"
3049   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3050         (vec_select:V16QI
3051           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3052           (parallel [(const_int 8) (const_int 9)
3053                      (const_int 10) (const_int 11)
3054                      (const_int 12) (const_int 13)
3055                      (const_int 14) (const_int 15)
3056                      (const_int 0) (const_int 1)
3057                      (const_int 2) (const_int 3)
3058                      (const_int 4) (const_int 5)
3059                      (const_int 6) (const_int 7)])))]
3060   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3061   "xxpermdi %x0,%x1,%x1,2"
3062   [(set_attr "type" "vecperm")])
3063
3064 ;; lxvd2x for little endian loads.  We need several of
3065 ;; these since the form of the PARALLEL differs by mode.
3066 (define_insn "*vsx_lxvd2x2_le_<mode>"
3067   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3068         (vec_select:VSX_D
3069           (match_operand:VSX_D 1 "memory_operand" "Z")
3070           (parallel [(const_int 1) (const_int 0)])))]
3071   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3072   "lxvd2x %x0,%y1"
3073   [(set_attr "type" "vecload")])
3074
3075 (define_insn "*vsx_lxvd2x4_le_<mode>"
3076   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3077         (vec_select:VSX_W
3078           (match_operand:VSX_W 1 "memory_operand" "Z")
3079           (parallel [(const_int 2) (const_int 3)
3080                      (const_int 0) (const_int 1)])))]
3081   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3082   "lxvd2x %x0,%y1"
3083   [(set_attr "type" "vecload")])
3084
3085 (define_insn "*vsx_lxvd2x8_le_V8HI"
3086   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3087         (vec_select:V8HI
3088           (match_operand:V8HI 1 "memory_operand" "Z")
3089           (parallel [(const_int 4) (const_int 5)
3090                      (const_int 6) (const_int 7)
3091                      (const_int 0) (const_int 1)
3092                      (const_int 2) (const_int 3)])))]
3093   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3094   "lxvd2x %x0,%y1"
3095   [(set_attr "type" "vecload")])
3096
3097 (define_insn "*vsx_lxvd2x16_le_V16QI"
3098   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3099         (vec_select:V16QI
3100           (match_operand:V16QI 1 "memory_operand" "Z")
3101           (parallel [(const_int 8) (const_int 9)
3102                      (const_int 10) (const_int 11)
3103                      (const_int 12) (const_int 13)
3104                      (const_int 14) (const_int 15)
3105                      (const_int 0) (const_int 1)
3106                      (const_int 2) (const_int 3)
3107                      (const_int 4) (const_int 5)
3108                      (const_int 6) (const_int 7)])))]
3109   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3110   "lxvd2x %x0,%y1"
3111   [(set_attr "type" "vecload")])
3112
3113 ;; stxvd2x for little endian stores.  We need several of
3114 ;; these since the form of the PARALLEL differs by mode.
3115 (define_insn "*vsx_stxvd2x2_le_<mode>"
3116   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3117         (vec_select:VSX_D
3118           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3119           (parallel [(const_int 1) (const_int 0)])))]
3120   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3121   "stxvd2x %x1,%y0"
3122   [(set_attr "type" "vecstore")])
3123
3124 (define_insn "*vsx_stxvd2x4_le_<mode>"
3125   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3126         (vec_select:VSX_W
3127           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3128           (parallel [(const_int 2) (const_int 3)
3129                      (const_int 0) (const_int 1)])))]
3130   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3131   "stxvd2x %x1,%y0"
3132   [(set_attr "type" "vecstore")])
3133
3134 (define_insn "*vsx_stxvd2x8_le_V8HI"
3135   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3136         (vec_select:V8HI
3137           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3138           (parallel [(const_int 4) (const_int 5)
3139                      (const_int 6) (const_int 7)
3140                      (const_int 0) (const_int 1)
3141                      (const_int 2) (const_int 3)])))]
3142   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3143   "stxvd2x %x1,%y0"
3144   [(set_attr "type" "vecstore")])
3145
3146 (define_insn "*vsx_stxvd2x16_le_V16QI"
3147   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3148         (vec_select:V16QI
3149           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3150           (parallel [(const_int 8) (const_int 9)
3151                      (const_int 10) (const_int 11)
3152                      (const_int 12) (const_int 13)
3153                      (const_int 14) (const_int 15)
3154                      (const_int 0) (const_int 1)
3155                      (const_int 2) (const_int 3)
3156                      (const_int 4) (const_int 5)
3157                      (const_int 6) (const_int 7)])))]
3158   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3159   "stxvd2x %x1,%y0"
3160   [(set_attr "type" "vecstore")])
3161
3162 ;; Convert a TImode value into V1TImode
3163 (define_expand "vsx_set_v1ti"
3164   [(match_operand:V1TI 0 "nonimmediate_operand")
3165    (match_operand:V1TI 1 "nonimmediate_operand")
3166    (match_operand:TI 2 "input_operand")
3167    (match_operand:QI 3 "u5bit_cint_operand")]
3168   "VECTOR_MEM_VSX_P (V1TImode)"
3169 {
3170   if (operands[3] != const0_rtx)
3171     gcc_unreachable ();
3172
3173   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3174   DONE;
3175 })
3176
3177 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3178 (define_expand "vsx_set_<mode>"
3179   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3180    (use (match_operand:VSX_D 1 "vsx_register_operand"))
3181    (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3182    (use (match_operand:QI 3 "const_0_to_1_operand"))]
3183   "VECTOR_MEM_VSX_P (<MODE>mode)"
3184 {
3185   rtx dest = operands[0];
3186   rtx vec_reg = operands[1];
3187   rtx value = operands[2];
3188   rtx ele = operands[3];
3189   rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3190
3191   if (ele == const0_rtx)
3192     {
3193       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3194       emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3195       DONE;
3196     }
3197   else if (ele == const1_rtx)
3198     {
3199       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3200       emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3201       DONE;
3202     }
3203   else
3204     gcc_unreachable ();
3205 })
3206
3207 ;; Extract a DF/DI element from V2DF/V2DI
3208 ;; Optimize cases were we can do a simple or direct move.
3209 ;; Or see if we can avoid doing the move at all
3210
3211 ;; There are some unresolved problems with reload that show up if an Altivec
3212 ;; register was picked.  Limit the scalar value to FPRs for now.
3213
3214 (define_insn "vsx_extract_<mode>"
3215   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
3216
3217         (vec_select:<VS_scalar>
3218          (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
3219
3220          (parallel
3221           [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
3222   "VECTOR_MEM_VSX_P (<MODE>mode)"
3223 {
3224   int element = INTVAL (operands[2]);
3225   int op0_regno = REGNO (operands[0]);
3226   int op1_regno = REGNO (operands[1]);
3227   int fldDM;
3228
3229   gcc_assert (IN_RANGE (element, 0, 1));
3230   gcc_assert (VSX_REGNO_P (op1_regno));
3231
3232   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3233     {
3234       if (op0_regno == op1_regno)
3235         return ASM_COMMENT_START " vec_extract to same register";
3236
3237       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3238                && TARGET_POWERPC64)
3239         return "mfvsrd %0,%x1";
3240
3241       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3242         return "fmr %0,%1";
3243
3244       else if (VSX_REGNO_P (op0_regno))
3245         return "xxlor %x0,%x1,%x1";
3246
3247       else
3248         gcc_unreachable ();
3249     }
3250
3251   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3252            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3253     return "mfvsrld %0,%x1";
3254
3255   else if (VSX_REGNO_P (op0_regno))
3256     {
3257       fldDM = element << 1;
3258       if (!BYTES_BIG_ENDIAN)
3259         fldDM = 3 - fldDM;
3260       operands[3] = GEN_INT (fldDM);
3261       return "xxpermdi %x0,%x1,%x1,%3";
3262     }
3263
3264   else
3265     gcc_unreachable ();
3266 }
3267   [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
3268
3269 ;; Optimize extracting a single scalar element from memory.
3270 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3271   [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3272         (vec_select:<VSX_D:VS_scalar>
3273          (match_operand:VSX_D 1 "memory_operand" "m,m")
3274          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3275    (clobber (match_scratch:P 3 "=&b,&b"))]
3276   "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3277   "#"
3278   "&& reload_completed"
3279   [(set (match_dup 0) (match_dup 4))]
3280 {
3281   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3282                                            operands[3], <VSX_D:VS_scalar>mode);
3283 }
3284   [(set_attr "type" "fpload,load")
3285    (set_attr "length" "8")])
3286
3287 ;; Optimize storing a single scalar element that is the right location to
3288 ;; memory
3289 (define_insn "*vsx_extract_<mode>_store"
3290   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3291         (vec_select:<VS_scalar>
3292          (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
3293          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3294   "VECTOR_MEM_VSX_P (<MODE>mode)"
3295   "@
3296    stfd%U0%X0 %1,%0
3297    stxsd%U0x %x1,%y0
3298    stxsd %1,%0"
3299   [(set_attr "type" "fpstore")
3300    (set_attr "length" "4")])
3301
3302 ;; Variable V2DI/V2DF extract shift
3303 (define_insn "vsx_vslo_<mode>"
3304   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3305         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3306                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3307                             UNSPEC_VSX_VSLO))]
3308   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3309   "vslo %0,%1,%2"
3310   [(set_attr "type" "vecperm")])
3311
3312 ;; Variable V2DI/V2DF extract
3313 (define_insn_and_split "vsx_extract_<mode>_var"
3314   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3315         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3316                              (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3317                             UNSPEC_VSX_EXTRACT))
3318    (clobber (match_scratch:DI 3 "=r,&b,&b"))
3319    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3320   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3321   "#"
3322   "&& reload_completed"
3323   [(const_int 0)]
3324 {
3325   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3326                                 operands[3], operands[4]);
3327   DONE;
3328 })
3329
3330 ;; Extract a SF element from V4SF
3331 (define_insn_and_split "vsx_extract_v4sf"
3332   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
3333         (vec_select:SF
3334          (match_operand:V4SF 1 "vsx_register_operand" "wa")
3335          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3336    (clobber (match_scratch:V4SF 3 "=0"))]
3337   "VECTOR_UNIT_VSX_P (V4SFmode)"
3338   "#"
3339   "&& 1"
3340   [(const_int 0)]
3341 {
3342   rtx op0 = operands[0];
3343   rtx op1 = operands[1];
3344   rtx op2 = operands[2];
3345   rtx op3 = operands[3];
3346   rtx tmp;
3347   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3348
3349   if (ele == 0)
3350     tmp = op1;
3351   else
3352     {
3353       if (GET_CODE (op3) == SCRATCH)
3354         op3 = gen_reg_rtx (V4SFmode);
3355       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3356       tmp = op3;
3357     }
3358   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3359   DONE;
3360 }
3361   [(set_attr "length" "8")
3362    (set_attr "type" "fp")])
3363
3364 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3365   [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3366         (vec_select:SF
3367          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3368          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3369    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3370   "VECTOR_MEM_VSX_P (V4SFmode)"
3371   "#"
3372   "&& reload_completed"
3373   [(set (match_dup 0) (match_dup 4))]
3374 {
3375   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3376                                            operands[3], SFmode);
3377 }
3378   [(set_attr "type" "fpload,fpload,fpload,load")
3379    (set_attr "length" "8")])
3380
3381 ;; Variable V4SF extract
3382 (define_insn_and_split "vsx_extract_v4sf_var"
3383   [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3384         (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3385                     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3386                    UNSPEC_VSX_EXTRACT))
3387    (clobber (match_scratch:DI 3 "=r,&b,&b"))
3388    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3389   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3390   "#"
3391   "&& reload_completed"
3392   [(const_int 0)]
3393 {
3394   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3395                                 operands[3], operands[4]);
3396   DONE;
3397 })
3398
3399 ;; Expand the builtin form of xxpermdi to canonical rtl.
3400 (define_expand "vsx_xxpermdi_<mode>"
3401   [(match_operand:VSX_L 0 "vsx_register_operand")
3402    (match_operand:VSX_L 1 "vsx_register_operand")
3403    (match_operand:VSX_L 2 "vsx_register_operand")
3404    (match_operand:QI 3 "u5bit_cint_operand")]
3405   "VECTOR_MEM_VSX_P (<MODE>mode)"
3406 {
3407   rtx target = operands[0];
3408   rtx op0 = operands[1];
3409   rtx op1 = operands[2];
3410   int mask = INTVAL (operands[3]);
3411   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3412   rtx perm1 = GEN_INT ((mask & 1) + 2);
3413   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3414
3415   if (<MODE>mode == V2DFmode)
3416     gen = gen_vsx_xxpermdi2_v2df_1;
3417   else
3418     {
3419       gen = gen_vsx_xxpermdi2_v2di_1;
3420       if (<MODE>mode != V2DImode)
3421         {
3422           target = gen_lowpart (V2DImode, target);
3423           op0 = gen_lowpart (V2DImode, op0);
3424           op1 = gen_lowpart (V2DImode, op1);
3425         }
3426     }
3427   emit_insn (gen (target, op0, op1, perm0, perm1));
3428   DONE;
3429 })
3430
3431 ;; Special version of xxpermdi that retains big-endian semantics.
3432 (define_expand "vsx_xxpermdi_<mode>_be"
3433   [(match_operand:VSX_L 0 "vsx_register_operand")
3434    (match_operand:VSX_L 1 "vsx_register_operand")
3435    (match_operand:VSX_L 2 "vsx_register_operand")
3436    (match_operand:QI 3 "u5bit_cint_operand")]
3437   "VECTOR_MEM_VSX_P (<MODE>mode)"
3438 {
3439   rtx target = operands[0];
3440   rtx op0 = operands[1];
3441   rtx op1 = operands[2];
3442   int mask = INTVAL (operands[3]);
3443   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3444   rtx perm1 = GEN_INT ((mask & 1) + 2);
3445   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3446
3447   if (<MODE>mode == V2DFmode)
3448     gen = gen_vsx_xxpermdi2_v2df_1;
3449   else
3450     {
3451       gen = gen_vsx_xxpermdi2_v2di_1;
3452       if (<MODE>mode != V2DImode)
3453         {
3454           target = gen_lowpart (V2DImode, target);
3455           op0 = gen_lowpart (V2DImode, op0);
3456           op1 = gen_lowpart (V2DImode, op1);
3457         }
3458     }
3459   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3460      transformation we don't want; it is necessary for
3461      rs6000_expand_vec_perm_const_1 but not for this use.  So we
3462      prepare for that by reversing the transformation here.  */
3463   if (BYTES_BIG_ENDIAN)
3464     emit_insn (gen (target, op0, op1, perm0, perm1));
3465   else
3466     {
3467       rtx p0 = GEN_INT (3 - INTVAL (perm1));
3468       rtx p1 = GEN_INT (3 - INTVAL (perm0));
3469       emit_insn (gen (target, op1, op0, p0, p1));
3470     }
3471   DONE;
3472 })
3473
3474 (define_insn "vsx_xxpermdi2_<mode>_1"
3475   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3476         (vec_select:VSX_D
3477           (vec_concat:<VS_double>
3478             (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3479             (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3480           (parallel [(match_operand 3 "const_0_to_1_operand" "")
3481                      (match_operand 4 "const_2_to_3_operand" "")])))]
3482   "VECTOR_MEM_VSX_P (<MODE>mode)"
3483 {
3484   int op3, op4, mask;
3485
3486   /* For little endian, swap operands and invert/swap selectors
3487      to get the correct xxpermdi.  The operand swap sets up the
3488      inputs as a little endian array.  The selectors are swapped
3489      because they are defined to use big endian ordering.  The
3490      selectors are inverted to get the correct doublewords for
3491      little endian ordering.  */
3492   if (BYTES_BIG_ENDIAN)
3493     {
3494       op3 = INTVAL (operands[3]);
3495       op4 = INTVAL (operands[4]);
3496     }
3497   else
3498     {
3499       op3 = 3 - INTVAL (operands[4]);
3500       op4 = 3 - INTVAL (operands[3]);
3501     }
3502
3503   mask = (op3 << 1) | (op4 - 2);
3504   operands[3] = GEN_INT (mask);
3505
3506   if (BYTES_BIG_ENDIAN)
3507     return "xxpermdi %x0,%x1,%x2,%3";
3508   else
3509     return "xxpermdi %x0,%x2,%x1,%3";
3510 }
3511   [(set_attr "type" "vecperm")])
3512
3513 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3514 ;; none of the small types were allowed in a vector register, so we had to
3515 ;; extract to a DImode and either do a direct move or store.
3516 (define_expand  "vsx_extract_<mode>"
3517   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3518                    (vec_select:<VS_scalar>
3519                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3520                     (parallel [(match_operand:QI 2 "const_int_operand")])))
3521               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3522   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3523 {
3524   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3525   if (TARGET_P9_VECTOR)
3526     {
3527       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3528                                             operands[2]));
3529       DONE;
3530     }
3531 })
3532
3533 (define_insn "vsx_extract_<mode>_p9"
3534   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3535         (vec_select:<VS_scalar>
3536          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3537          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3538    (clobber (match_scratch:SI 3 "=r,X"))]
3539   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3540 {
3541   if (which_alternative == 0)
3542     return "#";
3543
3544   else
3545     {
3546       HOST_WIDE_INT elt = INTVAL (operands[2]);
3547       HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
3548                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3549                                : elt);
3550
3551       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3552       HOST_WIDE_INT offset = unit_size * elt_adj;
3553
3554       operands[2] = GEN_INT (offset);
3555       if (unit_size == 4)
3556         return "xxextractuw %x0,%x1,%2";
3557       else
3558         return "vextractu<wd> %0,%1,%2";
3559     }
3560 }
3561   [(set_attr "type" "vecsimple")])
3562
3563 (define_split
3564   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3565         (vec_select:<VS_scalar>
3566          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3567          (parallel [(match_operand:QI 2 "const_int_operand")])))
3568    (clobber (match_operand:SI 3 "int_reg_operand"))]
3569   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3570   [(const_int 0)]
3571 {
3572   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3573   rtx op1 = operands[1];
3574   rtx op2 = operands[2];
3575   rtx op3 = operands[3];
3576   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3577
3578   emit_move_insn (op3, GEN_INT (offset));
3579   if (VECTOR_ELT_ORDER_BIG)
3580     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3581   else
3582     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3583   DONE;
3584 })
3585
3586 ;; Optimize zero extracts to eliminate the AND after the extract.
3587 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3588   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3589         (zero_extend:DI
3590          (vec_select:<VS_scalar>
3591           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3592           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3593    (clobber (match_scratch:SI 3 "=r,X"))]
3594   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3595   "#"
3596   "&& reload_completed"
3597   [(parallel [(set (match_dup 4)
3598                    (vec_select:<VS_scalar>
3599                     (match_dup 1)
3600                     (parallel [(match_dup 2)])))
3601               (clobber (match_dup 3))])]
3602 {
3603   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3604 })
3605
3606 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3607 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3608   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3609         (vec_select:<VS_scalar>
3610          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3611          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3612    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3613    (clobber (match_scratch:SI 4 "=X,&r"))]
3614   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3615   "#"
3616   "&& reload_completed"
3617   [(parallel [(set (match_dup 3)
3618                    (vec_select:<VS_scalar>
3619                     (match_dup 1)
3620                     (parallel [(match_dup 2)])))
3621               (clobber (match_dup 4))])
3622    (set (match_dup 0)
3623         (match_dup 3))])
3624
3625 (define_insn_and_split  "*vsx_extract_si"
3626   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3627         (vec_select:SI
3628          (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3629          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3630    (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3631   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3632   "#"
3633   "&& reload_completed"
3634   [(const_int 0)]
3635 {
3636   rtx dest = operands[0];
3637   rtx src = operands[1];
3638   rtx element = operands[2];
3639   rtx vec_tmp = operands[3];
3640   int value;
3641
3642   if (!VECTOR_ELT_ORDER_BIG)
3643     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3644
3645   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3646      instruction.  */
3647   value = INTVAL (element);
3648   if (value != 1)
3649     emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3650   else
3651     vec_tmp = src;
3652
3653   if (MEM_P (operands[0]))
3654     {
3655       if (can_create_pseudo_p ())
3656         dest = rs6000_address_for_fpconvert (dest);
3657
3658       if (TARGET_P8_VECTOR)
3659         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3660       else
3661         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3662     }
3663
3664   else if (TARGET_P8_VECTOR)
3665     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3666   else
3667     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3668                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
3669
3670   DONE;
3671 }
3672   [(set_attr "type" "mftgpr,vecperm,fpstore")
3673    (set_attr "length" "8")])
3674
3675 (define_insn_and_split  "*vsx_extract_<mode>_p8"
3676   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3677         (vec_select:<VS_scalar>
3678          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3679          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3680    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3681   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3682    && !TARGET_P9_VECTOR"
3683   "#"
3684   "&& reload_completed"
3685   [(const_int 0)]
3686 {
3687   rtx dest = operands[0];
3688   rtx src = operands[1];
3689   rtx element = operands[2];
3690   rtx vec_tmp = operands[3];
3691   int value;
3692
3693   if (!VECTOR_ELT_ORDER_BIG)
3694     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3695
3696   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3697      instruction.  */
3698   value = INTVAL (element);
3699   if (<MODE>mode == V16QImode)
3700     {
3701       if (value != 7)
3702         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3703       else
3704         vec_tmp = src;
3705     }
3706   else if (<MODE>mode == V8HImode)
3707     {
3708       if (value != 3)
3709         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3710       else
3711         vec_tmp = src;
3712     }
3713   else
3714     gcc_unreachable ();
3715
3716   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3717                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
3718   DONE;
3719 }
3720   [(set_attr "type" "mftgpr")])
3721
3722 ;; Optimize extracting a single scalar element from memory.
3723 (define_insn_and_split "*vsx_extract_<mode>_load"
3724   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3725         (vec_select:<VS_scalar>
3726          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3727          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3728    (clobber (match_scratch:DI 3 "=&b"))]
3729   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3730   "#"
3731   "&& reload_completed"
3732   [(set (match_dup 0) (match_dup 4))]
3733 {
3734   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3735                                            operands[3], <VS_scalar>mode);
3736 }
3737   [(set_attr "type" "load")
3738    (set_attr "length" "8")])
3739
3740 ;; Variable V16QI/V8HI/V4SI extract
3741 (define_insn_and_split "vsx_extract_<mode>_var"
3742   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3743         (unspec:<VS_scalar>
3744          [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3745           (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3746          UNSPEC_VSX_EXTRACT))
3747    (clobber (match_scratch:DI 3 "=r,r,&b"))
3748    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3749   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3750   "#"
3751   "&& reload_completed"
3752   [(const_int 0)]
3753 {
3754   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3755                                 operands[3], operands[4]);
3756   DONE;
3757 })
3758
3759 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3760   [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3761         (zero_extend:SDI
3762          (unspec:<VSX_EXTRACT_I:VS_scalar>
3763           [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3764            (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3765           UNSPEC_VSX_EXTRACT)))
3766    (clobber (match_scratch:DI 3 "=r,r,&b"))
3767    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3768   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3769   "#"
3770   "&& reload_completed"
3771   [(const_int 0)]
3772 {
3773   machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3774   rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3775                                 operands[1], operands[2],
3776                                 operands[3], operands[4]);
3777   DONE;
3778 })
3779
3780 ;; VSX_EXTRACT optimizations
3781 ;; Optimize double d = (double) vec_extract (vi, <n>)
3782 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3783 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3784   [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3785         (any_float:DF
3786          (vec_select:SI
3787           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3788           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3789    (clobber (match_scratch:V4SI 3 "=v"))]
3790   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3791   "#"
3792   "&& 1"
3793   [(const_int 0)]
3794 {
3795   rtx dest = operands[0];
3796   rtx src = operands[1];
3797   rtx element = operands[2];
3798   rtx v4si_tmp = operands[3];
3799   int value;
3800
3801   if (!VECTOR_ELT_ORDER_BIG)
3802     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3803
3804   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3805      instruction.  */
3806   value = INTVAL (element);
3807   if (value != 0)
3808     {
3809       if (GET_CODE (v4si_tmp) == SCRATCH)
3810         v4si_tmp = gen_reg_rtx (V4SImode);
3811       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3812     }
3813   else
3814     v4si_tmp = src;
3815
3816   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3817   DONE;
3818 })
3819
3820 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3821 ;; where <type> is a floating point type that supported by the hardware that is
3822 ;; not double.  First convert the value to double, and then to the desired
3823 ;; type.
3824 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3825   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3826         (any_float:VSX_EXTRACT_FL
3827          (vec_select:SI
3828           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3829           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3830    (clobber (match_scratch:V4SI 3 "=v"))
3831    (clobber (match_scratch:DF 4 "=ws"))]
3832   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3833   "#"
3834   "&& 1"
3835   [(const_int 0)]
3836 {
3837   rtx dest = operands[0];
3838   rtx src = operands[1];
3839   rtx element = operands[2];
3840   rtx v4si_tmp = operands[3];
3841   rtx df_tmp = operands[4];
3842   int value;
3843
3844   if (!VECTOR_ELT_ORDER_BIG)
3845     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3846
3847   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3848      instruction.  */
3849   value = INTVAL (element);
3850   if (value != 0)
3851     {
3852       if (GET_CODE (v4si_tmp) == SCRATCH)
3853         v4si_tmp = gen_reg_rtx (V4SImode);
3854       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3855     }
3856   else
3857     v4si_tmp = src;
3858
3859   if (GET_CODE (df_tmp) == SCRATCH)
3860     df_tmp = gen_reg_rtx (DFmode);
3861
3862   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3863
3864   if (<MODE>mode == SFmode)
3865     emit_insn (gen_truncdfsf2 (dest, df_tmp));
3866   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3867     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3868   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3869            && TARGET_FLOAT128_HW)
3870     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3871   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3872     emit_insn (gen_extenddfif2 (dest, df_tmp));
3873   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3874     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3875   else
3876     gcc_unreachable ();
3877
3878   DONE;
3879 })
3880
3881 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3882 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3883 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3884 ;; vector short or vector unsigned short.
3885 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3886   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3887         (float:FL_CONV
3888          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3889           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3890           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3891    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3892   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3893    && TARGET_P9_VECTOR"
3894   "#"
3895   "&& reload_completed"
3896   [(parallel [(set (match_dup 3)
3897                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3898                     (match_dup 1)
3899                     (parallel [(match_dup 2)])))
3900               (clobber (scratch:SI))])
3901    (set (match_dup 4)
3902         (sign_extend:DI (match_dup 3)))
3903    (set (match_dup 0)
3904         (float:<FL_CONV:MODE> (match_dup 4)))]
3905 {
3906   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3907 })
3908
3909 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3910   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3911         (unsigned_float:FL_CONV
3912          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3913           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3914           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3915    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3916   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3917    && TARGET_P9_VECTOR"
3918   "#"
3919   "&& reload_completed"
3920   [(parallel [(set (match_dup 3)
3921                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3922                     (match_dup 1)
3923                     (parallel [(match_dup 2)])))
3924               (clobber (scratch:SI))])
3925    (set (match_dup 0)
3926         (float:<FL_CONV:MODE> (match_dup 4)))]
3927 {
3928   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3929 })
3930
3931 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3932 (define_insn "vsx_set_<mode>_p9"
3933   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3934         (unspec:VSX_EXTRACT_I
3935          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3936           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3937           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3938          UNSPEC_VSX_SET))]
3939   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3940 {
3941   int ele = INTVAL (operands[3]);
3942   int nunits = GET_MODE_NUNITS (<MODE>mode);
3943
3944   if (!VECTOR_ELT_ORDER_BIG)
3945     ele = nunits - 1 - ele;
3946
3947   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3948   if (<MODE>mode == V4SImode)
3949     return "xxinsertw %x0,%x2,%3";
3950   else
3951     return "vinsert<wd> %0,%2,%3";
3952 }
3953   [(set_attr "type" "vecperm")])
3954
3955 (define_insn_and_split "vsx_set_v4sf_p9"
3956   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3957         (unspec:V4SF
3958          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3959           (match_operand:SF 2 "gpc_reg_operand" "ww")
3960           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3961          UNSPEC_VSX_SET))
3962    (clobber (match_scratch:SI 4 "=&wJwK"))]
3963   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3964   "#"
3965   "&& reload_completed"
3966   [(set (match_dup 5)
3967         (unspec:V4SF [(match_dup 2)]
3968                      UNSPEC_VSX_CVDPSPN))
3969    (parallel [(set (match_dup 4)
3970                    (vec_select:SI (match_dup 6)
3971                                   (parallel [(match_dup 7)])))
3972               (clobber (scratch:SI))])
3973    (set (match_dup 8)
3974         (unspec:V4SI [(match_dup 8)
3975                       (match_dup 4)
3976                       (match_dup 3)]
3977                      UNSPEC_VSX_SET))]
3978 {
3979   unsigned int tmp_regno = reg_or_subregno (operands[4]);
3980
3981   operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3982   operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3983   operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
3984   operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3985 }
3986   [(set_attr "type" "vecperm")
3987    (set_attr "length" "12")])
3988
3989 ;; Special case setting 0.0f to a V4SF element
3990 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3991   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3992         (unspec:V4SF
3993          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3994           (match_operand:SF 2 "zero_fp_constant" "j")
3995           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3996          UNSPEC_VSX_SET))
3997    (clobber (match_scratch:SI 4 "=&wJwK"))]
3998   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3999   "#"
4000   "&& reload_completed"
4001   [(set (match_dup 4)
4002         (const_int 0))
4003    (set (match_dup 5)
4004         (unspec:V4SI [(match_dup 5)
4005                       (match_dup 4)
4006                       (match_dup 3)]
4007                      UNSPEC_VSX_SET))]
4008 {
4009   operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4010 }
4011   [(set_attr "type" "vecperm")
4012    (set_attr "length" "8")])
4013
4014 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4015 ;; that is in the default scalar position (1 for big endian, 2 for little
4016 ;; endian).  We just need to do an xxinsertw since the element is in the
4017 ;; correct location.
4018
4019 (define_insn "*vsx_insert_extract_v4sf_p9"
4020   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4021         (unspec:V4SF
4022          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4023           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4024                          (parallel
4025                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4026           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4027          UNSPEC_VSX_SET))]
4028   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4029    && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4030 {
4031   int ele = INTVAL (operands[4]);
4032
4033   if (!VECTOR_ELT_ORDER_BIG)
4034     ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4035
4036   operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4037   return "xxinsertw %x0,%x2,%4";
4038 }
4039   [(set_attr "type" "vecperm")])
4040
4041 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4042 ;; that is in the default scalar position (1 for big endian, 2 for little
4043 ;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4044
4045 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4046   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4047         (unspec:V4SF
4048          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4049           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4050                          (parallel
4051                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4052           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4053          UNSPEC_VSX_SET))
4054    (clobber (match_scratch:SI 5 "=&wJwK"))]
4055   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4056    && TARGET_P9_VECTOR && TARGET_POWERPC64
4057    && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4058   "#"
4059   "&& 1"
4060   [(parallel [(set (match_dup 5)
4061                    (vec_select:SI (match_dup 6)
4062                                   (parallel [(match_dup 3)])))
4063               (clobber (scratch:SI))])
4064    (set (match_dup 7)
4065         (unspec:V4SI [(match_dup 8)
4066                       (match_dup 5)
4067                       (match_dup 4)]
4068                      UNSPEC_VSX_SET))]
4069 {
4070   if (GET_CODE (operands[5]) == SCRATCH)
4071     operands[5] = gen_reg_rtx (SImode);
4072
4073   operands[6] = gen_lowpart (V4SImode, operands[2]);
4074   operands[7] = gen_lowpart (V4SImode, operands[0]);
4075   operands[8] = gen_lowpart (V4SImode, operands[1]);
4076 }
4077   [(set_attr "type" "vecperm")])
4078
4079 ;; Expanders for builtins
4080 (define_expand "vsx_mergel_<mode>"
4081   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4082    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4083    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4084   "VECTOR_MEM_VSX_P (<MODE>mode)"
4085 {
4086   rtvec v;
4087   rtx x;
4088
4089   /* Special handling for LE with -maltivec=be.  */
4090   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4091     {
4092       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4093       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4094     }
4095   else
4096     {
4097       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4098       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4099     }
4100
4101   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4102   emit_insn (gen_rtx_SET (operands[0], x));
4103   DONE;
4104 })
4105
4106 (define_expand "vsx_mergeh_<mode>"
4107   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4108    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4109    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4110   "VECTOR_MEM_VSX_P (<MODE>mode)"
4111 {
4112   rtvec v;
4113   rtx x;
4114
4115   /* Special handling for LE with -maltivec=be.  */
4116   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4117     {
4118       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4119       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4120     }
4121   else
4122     {
4123       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4124       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4125     }
4126
4127   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4128   emit_insn (gen_rtx_SET (operands[0], x));
4129   DONE;
4130 })
4131
4132 ;; V2DF/V2DI splat
4133 ;; We separate the register splat insn from the memory splat insn to force the
4134 ;; register allocator to generate the indexed form of the SPLAT when it is
4135 ;; given an offsettable memory reference.  Otherwise, if the register and
4136 ;; memory insns were combined into a single insn, the register allocator will
4137 ;; load the value into a register, and then do a double word permute.
4138 (define_expand "vsx_splat_<mode>"
4139   [(set (match_operand:VSX_D 0 "vsx_register_operand")
4140         (vec_duplicate:VSX_D
4141          (match_operand:<VS_scalar> 1 "input_operand")))]
4142   "VECTOR_MEM_VSX_P (<MODE>mode)"
4143 {
4144   rtx op1 = operands[1];
4145   if (MEM_P (op1))
4146     operands[1] = rs6000_address_for_fpconvert (op1);
4147   else if (!REG_P (op1))
4148     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4149 })
4150
4151 (define_insn "vsx_splat_<mode>_reg"
4152   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4153         (vec_duplicate:VSX_D
4154          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
4155   "VECTOR_MEM_VSX_P (<MODE>mode)"
4156   "@
4157    xxpermdi %x0,%x1,%x1,0
4158    mtvsrdd %x0,%1,%1"
4159   [(set_attr "type" "vecperm")])
4160
4161 (define_insn "vsx_splat_<VSX_D:mode>_mem"
4162   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4163         (vec_duplicate:VSX_D
4164          (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4165   "VECTOR_MEM_VSX_P (<MODE>mode)"
4166   "lxvdsx %x0,%y1"
4167   [(set_attr "type" "vecload")])
4168
4169 ;; V4SI splat support
4170 (define_insn "vsx_splat_v4si"
4171   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4172         (vec_duplicate:V4SI
4173          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4174   "TARGET_P9_VECTOR"
4175   "@
4176    mtvsrws %x0,%1
4177    lxvwsx %x0,%y1"
4178   [(set_attr "type" "vecperm,vecload")])
4179
4180 ;; SImode is not currently allowed in vector registers.  This pattern
4181 ;; allows us to use direct move to get the value in a vector register
4182 ;; so that we can use XXSPLTW
4183 (define_insn "vsx_splat_v4si_di"
4184   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4185         (vec_duplicate:V4SI
4186          (truncate:SI
4187           (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4188   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4189   "@
4190    xxspltw %x0,%x1,1
4191    mtvsrws %x0,%1"
4192   [(set_attr "type" "vecperm")])
4193
4194 ;; V4SF splat (ISA 3.0)
4195 (define_insn_and_split "vsx_splat_v4sf"
4196   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4197         (vec_duplicate:V4SF
4198          (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
4199   "TARGET_P9_VECTOR"
4200   "@
4201    lxvwsx %x0,%y1
4202    #
4203    mtvsrws %x0,%1"
4204   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4205   [(set (match_dup 0)
4206         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4207    (set (match_dup 0)
4208         (unspec:V4SF [(match_dup 0)
4209                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4210   ""
4211   [(set_attr "type" "vecload,vecperm,mftgpr")
4212    (set_attr "length" "4,8,4")])
4213
4214 ;; V4SF/V4SI splat from a vector element
4215 (define_insn "vsx_xxspltw_<mode>"
4216   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4217         (vec_duplicate:VSX_W
4218          (vec_select:<VS_scalar>
4219           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4220           (parallel
4221            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4222   "VECTOR_MEM_VSX_P (<MODE>mode)"
4223 {
4224   if (!BYTES_BIG_ENDIAN)
4225     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4226
4227   return "xxspltw %x0,%x1,%2";
4228 }
4229   [(set_attr "type" "vecperm")])
4230
4231 (define_insn "vsx_xxspltw_<mode>_direct"
4232   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4233         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4234                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4235                       UNSPEC_VSX_XXSPLTW))]
4236   "VECTOR_MEM_VSX_P (<MODE>mode)"
4237   "xxspltw %x0,%x1,%2"
4238   [(set_attr "type" "vecperm")])
4239
4240 ;; V16QI/V8HI splat support on ISA 2.07
4241 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4242   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4243         (vec_duplicate:VSX_SPLAT_I
4244          (truncate:<VS_scalar>
4245           (match_operand:DI 1 "altivec_register_operand" "v"))))]
4246   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4247   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4248   [(set_attr "type" "vecperm")])
4249
4250 ;; V2DF/V2DI splat for use by vec_splat builtin
4251 (define_insn "vsx_xxspltd_<mode>"
4252   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4253         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4254                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4255                       UNSPEC_VSX_XXSPLTD))]
4256   "VECTOR_MEM_VSX_P (<MODE>mode)"
4257 {
4258   if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
4259       || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
4260     return "xxpermdi %x0,%x1,%x1,0";
4261   else
4262     return "xxpermdi %x0,%x1,%x1,3";
4263 }
4264   [(set_attr "type" "vecperm")])
4265
4266 ;; V4SF/V4SI interleave
4267 (define_insn "vsx_xxmrghw_<mode>"
4268   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4269         (vec_select:VSX_W
4270           (vec_concat:<VS_double>
4271             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4272             (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
4273           (parallel [(const_int 0) (const_int 4)
4274                      (const_int 1) (const_int 5)])))]
4275   "VECTOR_MEM_VSX_P (<MODE>mode)"
4276 {
4277   if (BYTES_BIG_ENDIAN)
4278     return "xxmrghw %x0,%x1,%x2";
4279   else
4280     return "xxmrglw %x0,%x2,%x1";
4281 }
4282   [(set_attr "type" "vecperm")])
4283
4284 (define_insn "vsx_xxmrglw_<mode>"
4285   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4286         (vec_select:VSX_W
4287           (vec_concat:<VS_double>
4288             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4289             (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
4290           (parallel [(const_int 2) (const_int 6)
4291                      (const_int 3) (const_int 7)])))]
4292   "VECTOR_MEM_VSX_P (<MODE>mode)"
4293 {
4294   if (BYTES_BIG_ENDIAN)
4295     return "xxmrglw %x0,%x1,%x2";
4296   else
4297     return "xxmrghw %x0,%x2,%x1";
4298 }
4299   [(set_attr "type" "vecperm")])
4300
4301 ;; Shift left double by word immediate
4302 (define_insn "vsx_xxsldwi_<mode>"
4303   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4304         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4305                        (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
4306                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
4307                       UNSPEC_VSX_SLDWI))]
4308   "VECTOR_MEM_VSX_P (<MODE>mode)"
4309   "xxsldwi %x0,%x1,%x2,%3"
4310   [(set_attr "type" "vecperm")])
4311
4312 \f
4313 ;; Vector reduction insns and splitters
4314
4315 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4316   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4317         (VEC_reduc:V2DF
4318          (vec_concat:V2DF
4319           (vec_select:DF
4320            (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4321            (parallel [(const_int 1)]))
4322           (vec_select:DF
4323            (match_dup 1)
4324            (parallel [(const_int 0)])))
4325          (match_dup 1)))
4326    (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4327   "VECTOR_UNIT_VSX_P (V2DFmode)"
4328   "#"
4329   ""
4330   [(const_int 0)]
4331 {
4332   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4333              ? gen_reg_rtx (V2DFmode)
4334              : operands[2];
4335   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4336   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4337   DONE;
4338 }
4339   [(set_attr "length" "8")
4340    (set_attr "type" "veccomplex")])
4341
4342 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4343   [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4344         (VEC_reduc:V4SF
4345          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4346          (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4347    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4348    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4349   "VECTOR_UNIT_VSX_P (V4SFmode)"
4350   "#"
4351   ""
4352   [(const_int 0)]
4353 {
4354   rtx op0 = operands[0];
4355   rtx op1 = operands[1];
4356   rtx tmp2, tmp3, tmp4;
4357
4358   if (can_create_pseudo_p ())
4359     {
4360       tmp2 = gen_reg_rtx (V4SFmode);
4361       tmp3 = gen_reg_rtx (V4SFmode);
4362       tmp4 = gen_reg_rtx (V4SFmode);
4363     }
4364   else
4365     {
4366       tmp2 = operands[2];
4367       tmp3 = operands[3];
4368       tmp4 = tmp2;
4369     }
4370
4371   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4372   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4373   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4374   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4375   DONE;
4376 }
4377   [(set_attr "length" "16")
4378    (set_attr "type" "veccomplex")])
4379
4380 ;; Combiner patterns with the vector reduction patterns that knows we can get
4381 ;; to the top element of the V2DF array without doing an extract.
4382
4383 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4384   [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
4385         (vec_select:DF
4386          (VEC_reduc:V2DF
4387           (vec_concat:V2DF
4388            (vec_select:DF
4389             (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4390             (parallel [(const_int 1)]))
4391            (vec_select:DF
4392             (match_dup 1)
4393             (parallel [(const_int 0)])))
4394           (match_dup 1))
4395          (parallel [(const_int 1)])))
4396    (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4397   "VECTOR_UNIT_VSX_P (V2DFmode)"
4398   "#"
4399   ""
4400   [(const_int 0)]
4401 {
4402   rtx hi = gen_highpart (DFmode, operands[1]);
4403   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4404             ? gen_reg_rtx (DFmode)
4405             : operands[2];
4406
4407   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4408   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4409   DONE;
4410 }
4411   [(set_attr "length" "8")
4412    (set_attr "type" "veccomplex")])
4413
4414 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4415   [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4416         (vec_select:SF
4417          (VEC_reduc:V4SF
4418           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4419           (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4420          (parallel [(const_int 3)])))
4421    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4422    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4423    (clobber (match_scratch:V4SF 4 "=0,0"))]
4424   "VECTOR_UNIT_VSX_P (V4SFmode)"
4425   "#"
4426   ""
4427   [(const_int 0)]
4428 {
4429   rtx op0 = operands[0];
4430   rtx op1 = operands[1];
4431   rtx tmp2, tmp3, tmp4, tmp5;
4432
4433   if (can_create_pseudo_p ())
4434     {
4435       tmp2 = gen_reg_rtx (V4SFmode);
4436       tmp3 = gen_reg_rtx (V4SFmode);
4437       tmp4 = gen_reg_rtx (V4SFmode);
4438       tmp5 = gen_reg_rtx (V4SFmode);
4439     }
4440   else
4441     {
4442       tmp2 = operands[2];
4443       tmp3 = operands[3];
4444       tmp4 = tmp2;
4445       tmp5 = operands[4];
4446     }
4447
4448   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4449   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4450   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4451   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4452   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4453   DONE;
4454 }
4455   [(set_attr "length" "20")
4456    (set_attr "type" "veccomplex")])
4457
4458 \f
4459 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4460 (define_peephole
4461   [(set (match_operand:P 0 "base_reg_operand")
4462         (match_operand:P 1 "short_cint_operand"))
4463    (set (match_operand:VSX_M 2 "vsx_register_operand")
4464         (mem:VSX_M (plus:P (match_dup 0)
4465                            (match_operand:P 3 "int_reg_operand"))))]
4466   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4467   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4468   [(set_attr "length" "8")
4469    (set_attr "type" "vecload")])
4470
4471 (define_peephole
4472   [(set (match_operand:P 0 "base_reg_operand")
4473         (match_operand:P 1 "short_cint_operand"))
4474    (set (match_operand:VSX_M 2 "vsx_register_operand")
4475         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4476                            (match_dup 0))))]
4477   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4478   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4479   [(set_attr "length" "8")
4480    (set_attr "type" "vecload")])
4481
4482 \f
4483 ;; ISA 3.0 vector extend sign support
4484
4485 (define_insn "vsx_sign_extend_qi_<mode>"
4486   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4487         (unspec:VSINT_84
4488          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4489          UNSPEC_VSX_SIGN_EXTEND))]
4490   "TARGET_P9_VECTOR"
4491   "vextsb2<wd> %0,%1"
4492   [(set_attr "type" "vecexts")])
4493
4494 (define_insn "vsx_sign_extend_hi_<mode>"
4495   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4496         (unspec:VSINT_84
4497          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4498          UNSPEC_VSX_SIGN_EXTEND))]
4499   "TARGET_P9_VECTOR"
4500   "vextsh2<wd> %0,%1"
4501   [(set_attr "type" "vecexts")])
4502
4503 (define_insn "*vsx_sign_extend_si_v2di"
4504   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4505         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4506                      UNSPEC_VSX_SIGN_EXTEND))]
4507   "TARGET_P9_VECTOR"
4508   "vextsw2d %0,%1"
4509   [(set_attr "type" "vecexts")])
4510
4511 \f
4512 ;; ISA 3.0 Binary Floating-Point Support
4513
4514 ;; VSX Scalar Extract Exponent Quad-Precision
4515 (define_insn "xsxexpqp_<mode>"
4516   [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4517         (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4518          UNSPEC_VSX_SXEXPDP))]
4519   "TARGET_P9_VECTOR"
4520   "xsxexpqp %0,%1"
4521   [(set_attr "type" "vecmove")])
4522
4523 ;; VSX Scalar Extract Exponent Double-Precision
4524 (define_insn "xsxexpdp"
4525   [(set (match_operand:DI 0 "register_operand" "=r")
4526         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4527          UNSPEC_VSX_SXEXPDP))]
4528   "TARGET_P9_VECTOR && TARGET_64BIT"
4529   "xsxexpdp %0,%x1"
4530   [(set_attr "type" "integer")])
4531
4532 ;; VSX Scalar Extract Significand Quad-Precision
4533 (define_insn "xsxsigqp_<mode>"
4534   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4535         (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4536          UNSPEC_VSX_SXSIG))]
4537   "TARGET_P9_VECTOR"
4538   "xsxsigqp %0,%1"
4539   [(set_attr "type" "vecmove")])
4540
4541 ;; VSX Scalar Extract Significand Double-Precision
4542 (define_insn "xsxsigdp"
4543   [(set (match_operand:DI 0 "register_operand" "=r")
4544         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4545          UNSPEC_VSX_SXSIG))]
4546   "TARGET_P9_VECTOR && TARGET_64BIT"
4547   "xsxsigdp %0,%x1"
4548   [(set_attr "type" "integer")])
4549
4550 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4551 (define_insn "xsiexpqpf_<mode>"
4552   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4553         (unspec:IEEE128
4554          [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4555           (match_operand:DI 2 "altivec_register_operand" "v")]
4556          UNSPEC_VSX_SIEXPQP))]
4557   "TARGET_P9_VECTOR"
4558   "xsiexpqp %0,%1,%2"
4559   [(set_attr "type" "vecmove")])
4560
4561 ;; VSX Scalar Insert Exponent Quad-Precision
4562 (define_insn "xsiexpqp_<mode>"
4563   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4564         (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4565                          (match_operand:DI 2 "altivec_register_operand" "v")]
4566          UNSPEC_VSX_SIEXPQP))]
4567   "TARGET_P9_VECTOR"
4568   "xsiexpqp %0,%1,%2"
4569   [(set_attr "type" "vecmove")])
4570
4571 ;; VSX Scalar Insert Exponent Double-Precision
4572 (define_insn "xsiexpdp"
4573   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4574         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4575                     (match_operand:DI 2 "register_operand" "r")]
4576          UNSPEC_VSX_SIEXPDP))]
4577   "TARGET_P9_VECTOR && TARGET_64BIT"
4578   "xsiexpdp %x0,%1,%2"
4579   [(set_attr "type" "fpsimple")])
4580
4581 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4582 (define_insn "xsiexpdpf"
4583   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4584         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4585                     (match_operand:DI 2 "register_operand" "r")]
4586          UNSPEC_VSX_SIEXPDP))]
4587   "TARGET_P9_VECTOR && TARGET_64BIT"
4588   "xsiexpdp %x0,%1,%2"
4589   [(set_attr "type" "fpsimple")])
4590
4591 ;; VSX Scalar Compare Exponents Double-Precision
4592 (define_expand "xscmpexpdp_<code>"
4593   [(set (match_dup 3)
4594         (compare:CCFP
4595          (unspec:DF
4596           [(match_operand:DF 1 "vsx_register_operand" "wa")
4597            (match_operand:DF 2 "vsx_register_operand" "wa")]
4598           UNSPEC_VSX_SCMPEXPDP)
4599          (const_int 0)))
4600    (set (match_operand:SI 0 "register_operand" "=r")
4601         (CMP_TEST:SI (match_dup 3)
4602                      (const_int 0)))]
4603   "TARGET_P9_VECTOR"
4604 {
4605   operands[3] = gen_reg_rtx (CCFPmode);
4606 })
4607
4608 (define_insn "*xscmpexpdp"
4609   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4610         (compare:CCFP
4611          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4612                      (match_operand:DF 2 "vsx_register_operand" "wa")]
4613           UNSPEC_VSX_SCMPEXPDP)
4614          (match_operand:SI 3 "zero_constant" "j")))]
4615   "TARGET_P9_VECTOR"
4616   "xscmpexpdp %0,%x1,%x2"
4617   [(set_attr "type" "fpcompare")])
4618
4619 ;; VSX Scalar Test Data Class Quad-Precision
4620 ;;  (Expansion for scalar_test_data_class (__ieee128, int))
4621 ;;   (Has side effect of setting the lt bit if operand 1 is negative,
4622 ;;    setting the eq bit if any of the conditions tested by operand 2
4623 ;;    are satisfied, and clearing the gt and undordered bits to zero.)
4624 (define_expand "xststdcqp_<mode>"
4625   [(set (match_dup 3)
4626         (compare:CCFP
4627          (unspec:IEEE128
4628           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4629            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4630           UNSPEC_VSX_STSTDC)
4631          (const_int 0)))
4632    (set (match_operand:SI 0 "register_operand" "=r")
4633         (eq:SI (match_dup 3)
4634                (const_int 0)))]
4635   "TARGET_P9_VECTOR"
4636 {
4637   operands[3] = gen_reg_rtx (CCFPmode);
4638 })
4639
4640 ;; VSX Scalar Test Data Class Double- and Single-Precision
4641 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4642 ;;   if any of the conditions tested by operand 2 are satisfied.
4643 ;;   The gt and unordered bits are cleared to zero.)
4644 (define_expand "xststdc<Fvsx>"
4645   [(set (match_dup 3)
4646         (compare:CCFP
4647          (unspec:SFDF
4648           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4649            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4650           UNSPEC_VSX_STSTDC)
4651          (match_dup 4)))
4652    (set (match_operand:SI 0 "register_operand" "=r")
4653         (eq:SI (match_dup 3)
4654                (const_int 0)))]
4655   "TARGET_P9_VECTOR"
4656 {
4657   operands[3] = gen_reg_rtx (CCFPmode);
4658   operands[4] = CONST0_RTX (SImode);
4659 })
4660
4661 ;; The VSX Scalar Test Negative Quad-Precision
4662 (define_expand "xststdcnegqp_<mode>"
4663   [(set (match_dup 2)
4664         (compare:CCFP
4665          (unspec:IEEE128
4666           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4667            (const_int 0)]
4668           UNSPEC_VSX_STSTDC)
4669          (const_int 0)))
4670    (set (match_operand:SI 0 "register_operand" "=r")
4671         (lt:SI (match_dup 2)
4672                (const_int 0)))]
4673   "TARGET_P9_VECTOR"
4674 {
4675   operands[2] = gen_reg_rtx (CCFPmode);
4676 })
4677
4678 ;; The VSX Scalar Test Negative Double- and Single-Precision
4679 (define_expand "xststdcneg<Fvsx>"
4680   [(set (match_dup 2)
4681         (compare:CCFP
4682          (unspec:SFDF
4683           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4684            (const_int 0)]
4685           UNSPEC_VSX_STSTDC)
4686          (match_dup 3)))
4687    (set (match_operand:SI 0 "register_operand" "=r")
4688         (lt:SI (match_dup 2)
4689                (const_int 0)))]
4690   "TARGET_P9_VECTOR"
4691 {
4692   operands[2] = gen_reg_rtx (CCFPmode);
4693   operands[3] = CONST0_RTX (SImode);
4694 })
4695
4696 (define_insn "*xststdcqp_<mode>"
4697   [(set (match_operand:CCFP 0 "" "=y")
4698         (compare:CCFP
4699          (unspec:IEEE128
4700           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4701            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4702           UNSPEC_VSX_STSTDC)
4703          (const_int 0)))]
4704   "TARGET_P9_VECTOR"
4705   "xststdcqp %0,%1,%2"
4706   [(set_attr "type" "fpcompare")])
4707
4708 (define_insn "*xststdc<Fvsx>"
4709   [(set (match_operand:CCFP 0 "" "=y")
4710         (compare:CCFP
4711          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4712                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
4713           UNSPEC_VSX_STSTDC)
4714          (match_operand:SI 3 "zero_constant" "j")))]
4715   "TARGET_P9_VECTOR"
4716   "xststdc<Fvsx> %0,%x1,%2"
4717   [(set_attr "type" "fpcompare")])
4718
4719 ;; VSX Vector Extract Exponent Double and Single Precision
4720 (define_insn "xvxexp<VSs>"
4721   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4722         (unspec:VSX_F
4723          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4724          UNSPEC_VSX_VXEXP))]
4725   "TARGET_P9_VECTOR"
4726   "xvxexp<VSs> %x0,%x1"
4727   [(set_attr "type" "vecsimple")])
4728
4729 ;; VSX Vector Extract Significand Double and Single Precision
4730 (define_insn "xvxsig<VSs>"
4731   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4732         (unspec:VSX_F
4733          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4734          UNSPEC_VSX_VXSIG))]
4735   "TARGET_P9_VECTOR"
4736   "xvxsig<VSs> %x0,%x1"
4737   [(set_attr "type" "vecsimple")])
4738
4739 ;; VSX Vector Insert Exponent Double and Single Precision
4740 (define_insn "xviexp<VSs>"
4741   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4742         (unspec:VSX_F
4743          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4744           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4745          UNSPEC_VSX_VIEXP))]
4746   "TARGET_P9_VECTOR"
4747   "xviexp<VSs> %x0,%x1,%x2"
4748   [(set_attr "type" "vecsimple")])
4749
4750 ;; VSX Vector Test Data Class Double and Single Precision
4751 ;; The corresponding elements of the result vector are all ones
4752 ;; if any of the conditions tested by operand 3 are satisfied.
4753 (define_insn "xvtstdc<VSs>"
4754   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4755         (unspec:<VSI>
4756          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4757           (match_operand:SI 2 "u7bit_cint_operand" "n")]
4758          UNSPEC_VSX_VTSTDC))]
4759   "TARGET_P9_VECTOR"
4760   "xvtstdc<VSs> %x0,%x1,%2"
4761   [(set_attr "type" "vecsimple")])
4762
4763 ;; ISA 3.0 String Operations Support
4764
4765 ;; Compare vectors producing a vector result and a predicate, setting CR6
4766 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4767 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4768 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4769 ;; to use Power8 instructions.
4770 (define_insn "*vsx_ne_<mode>_p"
4771   [(set (reg:CC CR6_REGNO)
4772         (unspec:CC
4773          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4774                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4775          UNSPEC_PREDICATE))
4776    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4777         (ne:VSX_EXTRACT_I (match_dup 1)
4778                           (match_dup 2)))]
4779   "TARGET_P9_VECTOR"
4780   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4781   [(set_attr "type" "vecsimple")])
4782
4783 (define_insn "*vector_nez_<mode>_p"
4784   [(set (reg:CC CR6_REGNO)
4785         (unspec:CC [(unspec:VI
4786                      [(match_operand:VI 1 "gpc_reg_operand" "v")
4787                       (match_operand:VI 2 "gpc_reg_operand" "v")]
4788                      UNSPEC_NEZ_P)]
4789          UNSPEC_PREDICATE))
4790    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4791         (unspec:VI [(match_dup 1)
4792                     (match_dup 2)]
4793          UNSPEC_NEZ_P))]
4794   "TARGET_P9_VECTOR"
4795   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4796   [(set_attr "type" "vecsimple")])
4797
4798 ;; Return first position of match between vectors
4799 (define_expand "first_match_index_<mode>"
4800   [(match_operand:SI 0 "register_operand")
4801    (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4802                (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4803   UNSPEC_VSX_FIRST_MATCH_INDEX)]
4804   "TARGET_P9_VECTOR"
4805 {
4806   int sh;
4807
4808   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4809   rtx not_result = gen_reg_rtx (<MODE>mode);
4810
4811   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4812                                              operands[2]));
4813   emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4814
4815   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4816
4817   if (<MODE>mode == V16QImode)
4818     emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4819   else
4820     {
4821       rtx tmp = gen_reg_rtx (SImode);
4822       emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4823       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4824     }
4825   DONE;
4826 })
4827
4828 ;; Return first position of match between vectors or end of string (EOS)
4829 (define_expand "first_match_or_eos_index_<mode>"
4830   [(match_operand:SI 0 "register_operand")
4831    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4832    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4833   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4834   "TARGET_P9_VECTOR"
4835 {
4836   int sh;
4837   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4838   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4839   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4840   rtx and_result = gen_reg_rtx (<MODE>mode);
4841   rtx result = gen_reg_rtx (<MODE>mode);
4842   rtx vzero = gen_reg_rtx (<MODE>mode);
4843
4844   /* Vector with zeros in elements that correspond to zeros in operands.  */
4845   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4846   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4847   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4848   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4849
4850   /* Vector with ones in elments that do not match.  */
4851   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4852                                              operands[2]));
4853
4854   /* Create vector with ones in elements where there was a zero in one of
4855      the source elements or the elements that match.  */
4856   emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4857   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4858
4859   if (<MODE>mode == V16QImode)
4860     emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4861   else
4862     {
4863       rtx tmp = gen_reg_rtx (SImode);
4864       emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4865       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4866     }
4867   DONE;
4868 })
4869
4870 ;; Return first position of mismatch between vectors
4871 (define_expand "first_mismatch_index_<mode>"
4872   [(match_operand:SI 0 "register_operand")
4873    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4874    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4875   UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4876   "TARGET_P9_VECTOR"
4877 {
4878   int sh;
4879   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4880
4881   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4882                                             operands[2]));
4883   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4884
4885   if (<MODE>mode == V16QImode)
4886     emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4887   else
4888     {
4889       rtx tmp = gen_reg_rtx (SImode);
4890       emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4891       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4892     }
4893   DONE;
4894 })
4895
4896 ;; Return first position of mismatch between vectors or end of string (EOS)
4897 (define_expand "first_mismatch_or_eos_index_<mode>"
4898   [(match_operand:SI 0 "register_operand")
4899    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4900    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4901   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4902   "TARGET_P9_VECTOR"
4903 {
4904   int sh;
4905   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4906   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4907   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4908   rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4909   rtx and_result = gen_reg_rtx (<MODE>mode);
4910   rtx result = gen_reg_rtx (<MODE>mode);
4911   rtx vzero = gen_reg_rtx (<MODE>mode);
4912
4913   /* Vector with zeros in elements that correspond to zeros in operands.  */
4914   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4915
4916   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4917   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4918   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4919
4920   /* Vector with ones in elments that match.  */
4921   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4922                                              operands[2]));
4923   emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4924
4925   /* Create vector with ones in elements where there was a zero in one of
4926      the source elements or the elements did not match.  */
4927   emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4928   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4929
4930   if (<MODE>mode == V16QImode)
4931     emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4932   else
4933     {
4934       rtx tmp = gen_reg_rtx (SImode);
4935       emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4936       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4937     }
4938   DONE;
4939 })
4940
4941 ;; Load VSX Vector with Length
4942 (define_expand "lxvl"
4943   [(set (match_dup 3)
4944         (ashift:DI (match_operand:DI 2 "register_operand")
4945                    (const_int 56)))
4946    (set (match_operand:V16QI 0 "vsx_register_operand")
4947         (unspec:V16QI
4948          [(match_operand:DI 1 "gpc_reg_operand")
4949           (mem:V16QI (match_dup 1))
4950           (match_dup 3)]
4951          UNSPEC_LXVL))]
4952   "TARGET_P9_VECTOR && TARGET_64BIT"
4953 {
4954   operands[3] = gen_reg_rtx (DImode);
4955 })
4956
4957 (define_insn "*lxvl"
4958   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4959         (unspec:V16QI
4960          [(match_operand:DI 1 "gpc_reg_operand" "b")
4961           (mem:V16QI (match_dup 1))
4962           (match_operand:DI 2 "register_operand" "r")]
4963          UNSPEC_LXVL))]
4964   "TARGET_P9_VECTOR && TARGET_64BIT"
4965   "lxvl %x0,%1,%2"
4966   [(set_attr "type" "vecload")])
4967
4968 (define_insn "lxvll"
4969   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4970         (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4971                        (mem:V16QI (match_dup 1))
4972                        (match_operand:DI 2 "register_operand" "r")]
4973                       UNSPEC_LXVLL))]
4974   "TARGET_P9_VECTOR"
4975   "lxvll %x0,%1,%2"
4976   [(set_attr "type" "vecload")])
4977
4978 ;; Expand for builtin xl_len_r
4979 (define_expand "xl_len_r"
4980   [(match_operand:V16QI 0 "vsx_register_operand")
4981    (match_operand:DI 1 "register_operand")
4982    (match_operand:DI 2 "register_operand")]
4983   ""
4984 {
4985   rtx shift_mask = gen_reg_rtx (V16QImode);
4986   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4987   rtx tmp = gen_reg_rtx (DImode);
4988
4989   emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4990   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4991   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4992   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4993              shift_mask));
4994   DONE;
4995 })
4996
4997 (define_insn "stxvll"
4998   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4999         (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5000                        (mem:V16QI (match_dup 1))
5001                        (match_operand:DI 2 "register_operand" "r")]
5002                       UNSPEC_STXVLL))]
5003   "TARGET_P9_VECTOR"
5004   "stxvll %x0,%1,%2"
5005   [(set_attr "type" "vecstore")])
5006
5007 ;; Store VSX Vector with Length
5008 (define_expand "stxvl"
5009   [(set (match_dup 3)
5010         (ashift:DI (match_operand:DI 2 "register_operand")
5011                    (const_int 56)))
5012    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5013         (unspec:V16QI
5014          [(match_operand:V16QI 0 "vsx_register_operand")
5015           (mem:V16QI (match_dup 1))
5016           (match_dup 3)]
5017          UNSPEC_STXVL))]
5018   "TARGET_P9_VECTOR && TARGET_64BIT"
5019 {
5020   operands[3] = gen_reg_rtx (DImode);
5021 })
5022
5023 (define_insn "*stxvl"
5024   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5025         (unspec:V16QI
5026          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5027           (mem:V16QI (match_dup 1))
5028           (match_operand:DI 2 "register_operand" "r")]
5029          UNSPEC_STXVL))]
5030   "TARGET_P9_VECTOR && TARGET_64BIT"
5031   "stxvl %x0,%1,%2"
5032   [(set_attr "type" "vecstore")])
5033
5034 ;; Expand for builtin xst_len_r
5035 (define_expand "xst_len_r"
5036   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5037    (match_operand:DI 1 "register_operand" "b")
5038    (match_operand:DI 2 "register_operand" "r")]
5039   "UNSPEC_XST_LEN_R"
5040 {
5041   rtx shift_mask = gen_reg_rtx (V16QImode);
5042   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5043   rtx tmp = gen_reg_rtx (DImode);
5044
5045   emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5046   emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5047              shift_mask));
5048   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5049   emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5050   DONE;
5051 })
5052
5053 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5054 (define_insn "vcmpneb"
5055   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5056          (not:V16QI
5057            (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5058                      (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5059   "TARGET_P9_VECTOR"
5060   "vcmpneb %0,%1,%2"
5061   [(set_attr "type" "vecsimple")])
5062
5063 ;; Vector Compare Not Equal or Zero Byte
5064 (define_insn "vcmpnezb"
5065   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5066         (unspec:V16QI
5067          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5068           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5069          UNSPEC_VCMPNEZB))]
5070   "TARGET_P9_VECTOR"
5071   "vcmpnezb %0,%1,%2"
5072   [(set_attr "type" "vecsimple")])
5073
5074 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5075 (define_insn "vcmpneh"
5076   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5077         (not:V8HI
5078           (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5079                    (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5080   "TARGET_P9_VECTOR"
5081   "vcmpneh %0,%1,%2"
5082   [(set_attr "type" "vecsimple")])
5083
5084 ;; Vector Compare Not Equal or Zero Half Word
5085 (define_insn "vcmpnezh"
5086   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5087         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5088                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
5089          UNSPEC_VCMPNEZH))]
5090   "TARGET_P9_VECTOR"
5091   "vcmpnezh %0,%1,%2"
5092   [(set_attr "type" "vecsimple")])
5093
5094 ;; Vector Compare Not Equal Word (specified/not+eq:)
5095 (define_insn "vcmpnew"
5096   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5097         (not:V4SI
5098           (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5099                    (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5100   "TARGET_P9_VECTOR"
5101   "vcmpnew %0,%1,%2"
5102   [(set_attr "type" "vecsimple")])
5103
5104 ;; Vector Compare Not Equal or Zero Word
5105 (define_insn "vcmpnezw"
5106   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5107         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5108                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
5109          UNSPEC_VCMPNEZW))]
5110   "TARGET_P9_VECTOR"
5111   "vcmpnezw %0,%1,%2"
5112   [(set_attr "type" "vecsimple")])
5113
5114 ;; Vector Count Leading Zero Least-Significant Bits Byte
5115 (define_insn "vclzlsbb"
5116   [(set (match_operand:SI 0 "register_operand" "=r")
5117         (unspec:SI
5118          [(match_operand:V16QI 1 "altivec_register_operand" "v")]
5119          UNSPEC_VCLZLSBB))]
5120   "TARGET_P9_VECTOR"
5121   "vclzlsbb %0,%1"
5122   [(set_attr "type" "vecsimple")])
5123
5124 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5125 (define_insn "vctzlsbb_<mode>"
5126   [(set (match_operand:SI 0 "register_operand" "=r")
5127         (unspec:SI
5128          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5129          UNSPEC_VCTZLSBB))]
5130   "TARGET_P9_VECTOR"
5131   "vctzlsbb %0,%1"
5132   [(set_attr "type" "vecsimple")])
5133
5134 ;; Vector Extract Unsigned Byte Left-Indexed
5135 (define_insn "vextublx"
5136   [(set (match_operand:SI 0 "register_operand" "=r")
5137         (unspec:SI
5138          [(match_operand:SI 1 "register_operand" "r")
5139           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5140          UNSPEC_VEXTUBLX))]
5141   "TARGET_P9_VECTOR"
5142   "vextublx %0,%1,%2"
5143   [(set_attr "type" "vecsimple")])
5144
5145 ;; Vector Extract Unsigned Byte Right-Indexed
5146 (define_insn "vextubrx"
5147   [(set (match_operand:SI 0 "register_operand" "=r")
5148         (unspec:SI
5149          [(match_operand:SI 1 "register_operand" "r")
5150           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5151          UNSPEC_VEXTUBRX))]
5152   "TARGET_P9_VECTOR"
5153   "vextubrx %0,%1,%2"
5154   [(set_attr "type" "vecsimple")])
5155
5156 ;; Vector Extract Unsigned Half Word Left-Indexed
5157 (define_insn "vextuhlx"
5158   [(set (match_operand:SI 0 "register_operand" "=r")
5159         (unspec:SI
5160          [(match_operand:SI 1 "register_operand" "r")
5161           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5162          UNSPEC_VEXTUHLX))]
5163   "TARGET_P9_VECTOR"
5164   "vextuhlx %0,%1,%2"
5165   [(set_attr "type" "vecsimple")])
5166
5167 ;; Vector Extract Unsigned Half Word Right-Indexed
5168 (define_insn "vextuhrx"
5169   [(set (match_operand:SI 0 "register_operand" "=r")
5170         (unspec:SI
5171          [(match_operand:SI 1 "register_operand" "r")
5172           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5173          UNSPEC_VEXTUHRX))]
5174   "TARGET_P9_VECTOR"
5175   "vextuhrx %0,%1,%2"
5176   [(set_attr "type" "vecsimple")])
5177
5178 ;; Vector Extract Unsigned Word Left-Indexed
5179 (define_insn "vextuwlx"
5180   [(set (match_operand:SI 0 "register_operand" "=r")
5181         (unspec:SI
5182          [(match_operand:SI 1 "register_operand" "r")
5183           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5184          UNSPEC_VEXTUWLX))]
5185   "TARGET_P9_VECTOR"
5186   "vextuwlx %0,%1,%2"
5187   [(set_attr "type" "vecsimple")])
5188
5189 ;; Vector Extract Unsigned Word Right-Indexed
5190 (define_insn "vextuwrx"
5191   [(set (match_operand:SI 0 "register_operand" "=r")
5192         (unspec:SI
5193          [(match_operand:SI 1 "register_operand" "r")
5194           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5195          UNSPEC_VEXTUWRX))]
5196   "TARGET_P9_VECTOR"
5197   "vextuwrx %0,%1,%2"
5198   [(set_attr "type" "vecsimple")])
5199
5200 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
5201 ;; endian version needs to adjust the byte number, and the V4SI element in
5202 ;; vinsert4b.
5203 (define_insn "extract4b"
5204   [(set (match_operand:V2DI 0 "vsx_register_operand")
5205        (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5206                      (match_operand:QI 2 "const_0_to_12_operand" "n")]
5207                     UNSPEC_XXEXTRACTUW))]
5208   "TARGET_P9_VECTOR"
5209 {
5210   if (!VECTOR_ELT_ORDER_BIG)
5211     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5212
5213   return "xxextractuw %x0,%x1,%2";
5214 })
5215
5216 (define_expand "insert4b"
5217   [(set (match_operand:V16QI 0 "vsx_register_operand")
5218         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5219                        (match_operand:V16QI 2 "vsx_register_operand")
5220                        (match_operand:QI 3 "const_0_to_12_operand")]
5221                    UNSPEC_XXINSERTW))]
5222   "TARGET_P9_VECTOR"
5223 {
5224   if (!VECTOR_ELT_ORDER_BIG)
5225     {
5226       rtx op1 = operands[1];
5227       rtx v4si_tmp = gen_reg_rtx (V4SImode);
5228       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5229       operands[1] = v4si_tmp;
5230       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5231     }
5232 })
5233
5234 (define_insn "*insert4b_internal"
5235   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5236         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5237                        (match_operand:V16QI 2 "vsx_register_operand" "0")
5238                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
5239                    UNSPEC_XXINSERTW))]
5240   "TARGET_P9_VECTOR"
5241   "xxinsertw %x0,%x1,%3"
5242   [(set_attr "type" "vecperm")])
5243
5244
5245 ;; Generate vector extract four float 32 values from left four elements
5246 ;; of eight element vector of float 16 values.
5247 (define_expand "vextract_fp_from_shorth"
5248   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5249         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5250    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5251   "TARGET_P9_VECTOR"
5252 {
5253   int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5254   int i;
5255
5256   rtx rvals[16];
5257   rtx mask = gen_reg_rtx (V16QImode);
5258   rtx tmp = gen_reg_rtx (V16QImode);
5259   rtvec v;
5260
5261   for (i = 0; i < 16; i++)
5262     rvals[i] = GEN_INT (vals[i]);
5263
5264   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5265      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5266      src half words 0,1,2,3 for the conversion instruction.  */
5267   v = gen_rtvec_v (16, rvals);
5268   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5269   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5270                                           operands[1], mask));
5271   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5272   DONE;
5273 })
5274
5275 ;; Generate vector extract four float 32 values from right four elements
5276 ;; of eight element vector of float 16 values.
5277 (define_expand "vextract_fp_from_shortl"
5278   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5279         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5280         UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5281   "TARGET_P9_VECTOR"
5282 {
5283   int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5284   int i;
5285   rtx rvals[16];
5286   rtx mask = gen_reg_rtx (V16QImode);
5287   rtx tmp = gen_reg_rtx (V16QImode);
5288   rtvec v;
5289
5290   for (i = 0; i < 16; i++)
5291     rvals[i] = GEN_INT (vals[i]);
5292
5293   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5294      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5295      src half words 4,5,6,7 for the conversion instruction.  */
5296   v = gen_rtvec_v (16, rvals);
5297   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5298   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5299                                           operands[1], mask));
5300   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5301   DONE;
5302 })
5303
5304 ;; Support for ISA 3.0 vector byte reverse
5305
5306 ;; Swap all bytes with in a vector
5307 (define_insn "p9_xxbrq_v1ti"
5308   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5309         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5310   "TARGET_P9_VECTOR"
5311   "xxbrq %x0,%x1"
5312   [(set_attr "type" "vecperm")])
5313
5314 (define_expand "p9_xxbrq_v16qi"
5315   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5316    (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5317   "TARGET_P9_VECTOR"
5318 {
5319   rtx op0 = gen_reg_rtx (V1TImode);
5320   rtx op1 = gen_lowpart (V1TImode, operands[1]);
5321   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5322   emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5323   DONE;
5324 })
5325
5326 ;; Swap all bytes in each 64-bit element
5327 (define_insn "p9_xxbrd_v2di"
5328   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5329         (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5330   "TARGET_P9_VECTOR"
5331   "xxbrd %x0,%x1"
5332   [(set_attr "type" "vecperm")])
5333
5334 (define_expand "p9_xxbrd_v2df"
5335   [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5336    (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5337   "TARGET_P9_VECTOR"
5338 {
5339   rtx op0 = gen_reg_rtx (V2DImode);
5340   rtx op1 = gen_lowpart (V2DImode, operands[1]);
5341   emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5342   emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5343   DONE;
5344 })
5345
5346 ;; Swap all bytes in each 32-bit element
5347 (define_insn "p9_xxbrw_v4si"
5348   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5349         (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5350   "TARGET_P9_VECTOR"
5351   "xxbrw %x0,%x1"
5352   [(set_attr "type" "vecperm")])
5353
5354 (define_expand "p9_xxbrw_v4sf"
5355   [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5356    (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5357   "TARGET_P9_VECTOR"
5358 {
5359   rtx op0 = gen_reg_rtx (V4SImode);
5360   rtx op1 = gen_lowpart (V4SImode, operands[1]);
5361   emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5362   emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5363   DONE;
5364 })
5365
5366 ;; Swap all bytes in each element of vector
5367 (define_expand "revb_<mode>"
5368   [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5369    (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5370   ""
5371 {
5372   if (TARGET_P9_VECTOR)
5373     emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5374   else
5375     {
5376       /* Want to have the elements in reverse order relative
5377          to the endian mode in use, i.e. in LE mode, put elements
5378          in BE order.  */
5379       rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5380       emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5381                                            operands[1], sel));
5382     }
5383
5384   DONE;
5385 })
5386
5387 ;; Reversing bytes in vector char is just a NOP.
5388 (define_expand "revb_v16qi"
5389   [(set (match_operand:V16QI 0 "vsx_register_operand")
5390         (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5391   ""
5392 {
5393   emit_move_insn (operands[0], operands[1]);
5394   DONE;
5395 })
5396
5397 ;; Swap all bytes in each 16-bit element
5398 (define_insn "p9_xxbrh_v8hi"
5399   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5400         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5401   "TARGET_P9_VECTOR"
5402   "xxbrh %x0,%x1"
5403   [(set_attr "type" "vecperm")])
5404 \f
5405
5406 ;; Operand numbers for the following peephole2
5407 (define_constants
5408   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
5409    (SFBOOL_TMP_VSX               1)             ;; vector temporary
5410    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
5411    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
5412    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
5413    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
5414    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
5415    (SFBOOL_SHL_D                 7)             ;; shift left dest
5416    (SFBOOL_SHL_A                 8)             ;; shift left arg
5417    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
5418    (SFBOOL_MFVSR_A_V4SF         10)             ;; SFBOOL_MFVSR_A as V4SFmode
5419    (SFBOOL_BOOL_A_DI            11)             ;; SFBOOL_BOOL_A1/A2 as DImode
5420    (SFBOOL_TMP_VSX_DI           12)             ;; SFBOOL_TMP_VSX as DImode
5421    (SFBOOL_MTVSR_D_V4SF         13)])           ;; SFBOOL_MTVSRD_D as V4SFmode
5422
5423 ;; Attempt to optimize some common GLIBC operations using logical operations to
5424 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
5425 ;; after macro expansion that looks like:
5426 ;;
5427 ;;      typedef union {
5428 ;;        float value;
5429 ;;        uint32_t word;
5430 ;;      } ieee_float_shape_type;
5431 ;;
5432 ;;      float t1;
5433 ;;      int32_t is;
5434 ;;
5435 ;;      do {
5436 ;;        ieee_float_shape_type gf_u;
5437 ;;        gf_u.value = (t1);
5438 ;;        (is) = gf_u.word;
5439 ;;      } while (0);
5440 ;;
5441 ;;      do {
5442 ;;        ieee_float_shape_type sf_u;
5443 ;;        sf_u.word = (is & 0xfffff000);
5444 ;;        (t1) = sf_u.value;
5445 ;;      } while (0);
5446 ;;
5447 ;;
5448 ;; This would result in two direct move operations (convert to memory format,
5449 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5450 ;; scalar format).  With this peephole, we eliminate the direct move to the
5451 ;; GPR, and instead move the integer mask value to the vector register after a
5452 ;; shift and do the VSX logical operation.
5453
5454 ;; The insns for dealing with SFmode in GPR registers looks like:
5455 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5456 ;;
5457 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5458 ;;
5459 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5460 ;;
5461 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5462 ;;
5463 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5464 ;;
5465 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5466
5467 (define_peephole2
5468   [(match_scratch:DI SFBOOL_TMP_GPR "r")
5469    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5470
5471    ;; MFVSRWZ (aka zero_extend)
5472    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5473         (zero_extend:DI
5474          (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5475
5476    ;; AND/IOR/XOR operation on int
5477    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5478         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5479                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5480
5481    ;; SLDI
5482    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5483         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5484                    (const_int 32)))
5485
5486    ;; MTVSRD
5487    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5488         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5489
5490   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5491    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5492       to compare registers, when the mode is different.  */
5493    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5494    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5495    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5496    && (REG_P (operands[SFBOOL_BOOL_A2])
5497        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5498    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5499        || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5500    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5501        || (REG_P (operands[SFBOOL_BOOL_A2])
5502            && REGNO (operands[SFBOOL_MFVSR_D])
5503                 == REGNO (operands[SFBOOL_BOOL_A2])))
5504    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5505    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5506        || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5507    && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5508   [(set (match_dup SFBOOL_TMP_GPR)
5509         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5510                    (const_int 32)))
5511
5512    (set (match_dup SFBOOL_TMP_VSX_DI)
5513         (match_dup SFBOOL_TMP_GPR))
5514
5515    (set (match_dup SFBOOL_MTVSR_D_V4SF)
5516         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5517                           (match_dup SFBOOL_TMP_VSX)))]
5518 {
5519   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5520   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5521   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5522   int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5523   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5524   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5525
5526   if (CONST_INT_P (bool_a2))
5527     {
5528       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5529       emit_move_insn (tmp_gpr, bool_a2);
5530       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5531     }
5532   else
5533     {
5534       int regno_bool_a1 = REGNO (bool_a1);
5535       int regno_bool_a2 = REGNO (bool_a2);
5536       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5537                           ? regno_bool_a2 : regno_bool_a1);
5538       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5539     }
5540
5541   operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5542   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5543   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5544 })