gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Mode attribute for vector floate and floato conversions
  25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
  26
  27 ;; Iterator for both scalar and vector floating point types supported by VSX
  28 (define_mode_iterator VSX_B [DF V4SF V2DF])
  29
  30 ;; Iterator for the 2 64-bit vector types
  31 (define_mode_iterator VSX_D [V2DF V2DI])
  32
  33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  34 ;; types that goes in a single vector register.
  35 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  36                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  37                                   TI
  38                                   V1TI])
  39
  40 ;; Iterator for 128-bit integer types that go in a single vector register.
  41 (define_mode_iterator VSX_TI [TI V1TI])
  42
  43 ;; Iterator for the 2 32-bit vector types
  44 (define_mode_iterator VSX_W [V4SF V4SI])
  45
  46 ;; Iterator for the DF types
  47 (define_mode_iterator VSX_DF [V2DF DF])
  48
  49 ;; Iterator for vector floating point types supported by VSX
  50 (define_mode_iterator VSX_F [V4SF V2DF])
  51
  52 ;; Iterator for logical types supported by VSX
  53 (define_mode_iterator VSX_L [V16QI
  54                              V8HI
  55                              V4SI
  56                              V2DI
  57                              V4SF
  58                              V2DF
  59                              V1TI
  60                              TI
  61                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  62                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  63
  64 ;; Iterator for memory moves.
  65 (define_mode_iterator VSX_M [V16QI
  66                              V8HI
  67                              V4SI
  68                              V2DI
  69                              V4SF
  70                              V2DF
  71                              V1TI
  72                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  73                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  74                              TI])
  75
  76 (define_mode_attr VSX_XXBR  [(V8HI  "h")
  77                              (V4SI  "w")
  78                              (V4SF  "w")
  79                              (V2DF  "d")
  80                              (V2DI  "d")
  81                              (V1TI  "q")])
  82
  83 ;; Map into the appropriate load/store name based on the type
  84 (define_mode_attr VSm  [(V16QI "vw4")
  85                         (V8HI  "vw4")
  86                         (V4SI  "vw4")
  87                         (V4SF  "vw4")
  88                         (V2DF  "vd2")
  89                         (V2DI  "vd2")
  90                         (DF    "d")
  91                         (TF    "vd2")
  92                         (KF    "vd2")
  93                         (V1TI  "vd2")
  94                         (TI    "vd2")])
  95
  96 ;; Map into the appropriate suffix based on the type
  97 (define_mode_attr VSs   [(V16QI "sp")
  98                          (V8HI  "sp")
  99                          (V4SI  "sp")
 100                          (V4SF  "sp")
 101                          (V2DF  "dp")
 102                          (V2DI  "dp")
 103                          (DF    "dp")
 104                          (SF    "sp")
 105                          (TF    "dp")
 106                          (KF    "dp")
 107                          (V1TI  "dp")
 108                          (TI    "dp")])
 109
 110 ;; Map the register class used
 111 (define_mode_attr VSr   [(V16QI "v")
 112                          (V8HI  "v")
 113                          (V4SI  "v")
 114                          (V4SF  "wf")
 115                          (V2DI  "wd")
 116                          (V2DF  "wd")
 117                          (DI    "wi")
 118                          (DF    "ws")
 119                          (SF    "ww")
 120                          (TF    "wp")
 121                          (KF    "wq")
 122                          (V1TI  "v")
 123                          (TI    "wt")])
 124
 125 ;; Map the register class used for float<->int conversions (floating point side)
 126 ;; VSr2 is the preferred register class, VSr3 is any register class that will
 127 ;; hold the data
 128 (define_mode_attr VSr2  [(V2DF  "wd")
 129                          (V4SF  "wf")
 130                          (DF    "ws")
 131                          (SF    "ww")
 132                          (DI    "wi")
 133                          (KF    "wq")
 134                          (TF    "wp")])
 135
 136 (define_mode_attr VSr3  [(V2DF  "wa")
 137                          (V4SF  "wa")
 138                          (DF    "ws")
 139                          (SF    "ww")
 140                          (DI    "wi")
 141                          (KF    "wq")
 142                          (TF    "wp")])
 143
 144 ;; Map the register class for sp<->dp float conversions, destination
 145 (define_mode_attr VSr4  [(SF    "ws")
 146                          (DF    "f")
 147                          (V2DF  "wd")
 148                          (V4SF  "v")])
 149
 150 ;; Map the register class for sp<->dp float conversions, source
 151 (define_mode_attr VSr5  [(SF    "ws")
 152                          (DF    "f")
 153                          (V2DF  "v")
 154                          (V4SF  "wd")])
 155
 156 ;; The VSX register class that a type can occupy, even if it is not the
 157 ;; preferred register class (VSr is the preferred register class that will get
 158 ;; allocated first).
 159 (define_mode_attr VSa   [(V16QI "wa")
 160                          (V8HI  "wa")
 161                          (V4SI  "wa")
 162                          (V4SF  "wa")
 163                          (V2DI  "wa")
 164                          (V2DF  "wa")
 165                          (DI    "wi")
 166                          (DF    "ws")
 167                          (SF    "ww")
 168                          (V1TI  "wa")
 169                          (TI    "wt")
 170                          (TF    "wp")
 171                          (KF    "wq")])
 172
 173 ;; Same size integer type for floating point data
 174 (define_mode_attr VSi [(V4SF  "v4si")
 175                        (V2DF  "v2di")
 176                        (DF    "di")])
 177
 178 (define_mode_attr VSI [(V4SF  "V4SI")
 179                        (V2DF  "V2DI")
 180                        (DF    "DI")])
 181
 182 ;; Word size for same size conversion
 183 (define_mode_attr VSc [(V4SF "w")
 184                        (V2DF "d")
 185                        (DF   "d")])
 186
 187 ;; Map into either s or v, depending on whether this is a scalar or vector
 188 ;; operation
 189 (define_mode_attr VSv   [(V16QI "v")
 190                          (V8HI  "v")
 191                          (V4SI  "v")
 192                          (V4SF  "v")
 193                          (V2DI  "v")
 194                          (V2DF  "v")
 195                          (V1TI  "v")
 196                          (DF    "s")
 197                          (KF    "v")])
 198
 199 ;; Appropriate type for add ops (and other simple FP ops)
 200 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 201                                  (V4SF "vecfloat")
 202                                  (DF   "fp")])
 203
 204 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
 205                                    (V4SF "fp_addsub_s")
 206                                    (DF   "fp_addsub_d")])
 207
 208 ;; Appropriate type for multiply ops
 209 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 210                                  (V4SF "vecfloat")
 211                                  (DF   "dmul")])
 212
 213 (define_mode_attr VSfptype_mul  [(V2DF "fp_mul_d")
 214                                  (V4SF "fp_mul_s")
 215                                  (DF   "fp_mul_d")])
 216
 217 ;; Appropriate type for divide ops.
 218 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 219                                  (V4SF "vecfdiv")
 220                                  (DF   "ddiv")])
 221
 222 (define_mode_attr VSfptype_div  [(V2DF "fp_div_d")
 223                                  (V4SF "fp_div_s")
 224                                  (DF   "fp_div_d")])
 225
 226 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 227 ;; the scalar sqrt
 228 (define_mode_attr VStype_sqrt   [(V2DF "dsqrt")
 229                                  (V4SF "ssqrt")
 230                                  (DF   "dsqrt")])
 231
 232 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
 233                                  (V4SF "fp_sqrt_s")
 234                                  (DF   "fp_sqrt_d")])
 235
 236 ;; Iterator and modes for sp<->dp conversions
 237 ;; Because scalar SF values are represented internally as double, use the
 238 ;; V4SF type to represent this than SF.
 239 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
 240
 241 (define_mode_attr VS_spdp_res [(DF      "V4SF")
 242                                (V4SF    "V2DF")
 243                                (V2DF    "V4SF")])
 244
 245 (define_mode_attr VS_spdp_insn [(DF     "xscvdpsp")
 246                                 (V4SF   "xvcvspdp")
 247                                 (V2DF   "xvcvdpsp")])
 248
 249 (define_mode_attr VS_spdp_type [(DF     "fp")
 250                                 (V4SF   "vecdouble")
 251                                 (V2DF   "vecdouble")])
 252
 253 ;; Map the scalar mode for a vector type
 254 (define_mode_attr VS_scalar [(V1TI      "TI")
 255                              (V2DF      "DF")
 256                              (V2DI      "DI")
 257                              (V4SF      "SF")
 258                              (V4SI      "SI")
 259                              (V8HI      "HI")
 260                              (V16QI     "QI")])
 261
 262 ;; Map to a double-sized vector mode
 263 (define_mode_attr VS_double [(V4SI      "V8SI")
 264                              (V4SF      "V8SF")
 265                              (V2DI      "V4DI")
 266                              (V2DF      "V4DF")
 267                              (V1TI      "V2TI")])
 268
 269 ;; Map register class for 64-bit element in 128-bit vector for direct moves
 270 ;; to/from gprs
 271 (define_mode_attr VS_64dm [(V2DF        "wk")
 272                            (V2DI        "wj")])
 273
 274 ;; Map register class for 64-bit element in 128-bit vector for normal register
 275 ;; to register moves
 276 (define_mode_attr VS_64reg [(V2DF       "ws")
 277                             (V2DI       "wi")])
 278
 279 ;; Iterators for loading constants with xxspltib
 280 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 281 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 282
 283 ;; Vector reverse byte modes
 284 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 285
 286 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 287 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 288 ;; done on ISA 2.07 and not just ISA 3.0.
 289 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 290 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 291
 292 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 293                                      (V8HI "h")
 294                                      (V4SI "w")])
 295
 296 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 297 ;; insert to validate the operand number.
 298 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 299                                          (V8HI  "const_0_to_7_operand")
 300                                          (V4SI  "const_0_to_3_operand")])
 301
 302 ;; Mode attribute to give the constraint for vector extract and insert
 303 ;; operations.
 304 (define_mode_attr VSX_EX [(V16QI "v")
 305                           (V8HI  "v")
 306                           (V4SI  "wa")])
 307
 308 ;; Mode iterator for binary floating types other than double to
 309 ;; optimize convert to that floating point type from an extract
 310 ;; of an integer type
 311 (define_mode_iterator VSX_EXTRACT_FL [SF
 312                                       (IF "FLOAT128_2REG_P (IFmode)")
 313                                       (KF "TARGET_FLOAT128_HW")
 314                                       (TF "FLOAT128_2REG_P (TFmode)
 315                                            || (FLOAT128_IEEE_P (TFmode)
 316                                                && TARGET_FLOAT128_HW)")])
 317
 318 ;; Mode iterator for binary floating types that have a direct conversion
 319 ;; from 64-bit integer to floating point
 320 (define_mode_iterator FL_CONV [SF
 321                                DF
 322                                (KF "TARGET_FLOAT128_HW")
 323                                (TF "TARGET_FLOAT128_HW
 324                                     && FLOAT128_IEEE_P (TFmode)")])
 325
 326 ;; Iterator for the 2 short vector types to do a splat from an integer
 327 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 328
 329 ;; Mode attribute to give the count for the splat instruction to splat
 330 ;; the value in the 64-bit integer slot
 331 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 332
 333 ;; Mode attribute to give the suffix for the splat instruction
 334 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 335
 336 ;; Constants for creating unspecs
 337 (define_c_enum "unspec"
 338   [UNSPEC_VSX_CONCAT
 339    UNSPEC_VSX_CVDPSXWS
 340    UNSPEC_VSX_CVDPUXWS
 341    UNSPEC_VSX_CVSPDP
 342    UNSPEC_VSX_CVHPSP
 343    UNSPEC_VSX_CVSPDPN
 344    UNSPEC_VSX_CVDPSPN
 345    UNSPEC_VSX_CVSXWDP
 346    UNSPEC_VSX_CVUXWDP
 347    UNSPEC_VSX_CVSXDSP
 348    UNSPEC_VSX_CVUXDSP
 349    UNSPEC_VSX_CVSPSXDS
 350    UNSPEC_VSX_CVSPUXDS
 351    UNSPEC_VSX_CVSXWSP
 352    UNSPEC_VSX_CVUXWSP
 353    UNSPEC_VSX_FLOAT2
 354    UNSPEC_VSX_UNS_FLOAT2
 355    UNSPEC_VSX_FLOATE
 356    UNSPEC_VSX_UNS_FLOATE
 357    UNSPEC_VSX_FLOATO
 358    UNSPEC_VSX_UNS_FLOATO
 359    UNSPEC_VSX_TDIV
 360    UNSPEC_VSX_TSQRT
 361    UNSPEC_VSX_SET
 362    UNSPEC_VSX_ROUND_I
 363    UNSPEC_VSX_ROUND_IC
 364    UNSPEC_VSX_SLDWI
 365    UNSPEC_VSX_XXPERM
 366
 367    UNSPEC_VSX_XXSPLTW
 368    UNSPEC_VSX_XXSPLTD
 369    UNSPEC_VSX_DIVSD
 370    UNSPEC_VSX_DIVUD
 371    UNSPEC_VSX_MULSD
 372    UNSPEC_VSX_XVCVSXDDP
 373    UNSPEC_VSX_XVCVUXDDP
 374    UNSPEC_VSX_XVCVDPSXDS
 375    UNSPEC_VSX_XVCDPSP
 376    UNSPEC_VSX_XVCVDPUXDS
 377    UNSPEC_VSX_SIGN_EXTEND
 378    UNSPEC_VSX_XVCVSPSXWS
 379    UNSPEC_VSX_XVCVSPSXDS
 380    UNSPEC_VSX_VSLO
 381    UNSPEC_VSX_EXTRACT
 382    UNSPEC_VSX_SXEXPDP
 383    UNSPEC_VSX_SXSIG
 384    UNSPEC_VSX_SIEXPDP
 385    UNSPEC_VSX_SIEXPQP
 386    UNSPEC_VSX_SCMPEXPDP
 387    UNSPEC_VSX_STSTDC
 388    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
 389    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
 390    UNSPEC_VSX_VXEXP
 391    UNSPEC_VSX_VXSIG
 392    UNSPEC_VSX_VIEXP
 393    UNSPEC_VSX_VTSTDC
 394    UNSPEC_VSX_VEC_INIT
 395    UNSPEC_VSX_VSIGNED2
 396
 397    UNSPEC_LXVL
 398    UNSPEC_LXVLL
 399    UNSPEC_LVSL_REG
 400    UNSPEC_LVSR_REG
 401    UNSPEC_STXVL
 402    UNSPEC_STXVLL
 403    UNSPEC_XL_LEN_R
 404    UNSPEC_XST_LEN_R
 405
 406    UNSPEC_VCLZLSBB
 407    UNSPEC_VCTZLSBB
 408    UNSPEC_VEXTUBLX
 409    UNSPEC_VEXTUHLX
 410    UNSPEC_VEXTUWLX
 411    UNSPEC_VEXTUBRX
 412    UNSPEC_VEXTUHRX
 413    UNSPEC_VEXTUWRX
 414    UNSPEC_VCMPNEB
 415    UNSPEC_VCMPNEZB
 416    UNSPEC_VCMPNEH
 417    UNSPEC_VCMPNEZH
 418    UNSPEC_VCMPNEW
 419    UNSPEC_VCMPNEZW
 420    UNSPEC_XXEXTRACTUW
 421    UNSPEC_XXINSERTW
 422    UNSPEC_VSX_FIRST_MATCH_INDEX
 423    UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
 424    UNSPEC_VSX_FIRST_MISMATCH_INDEX
 425    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
 426   ])
 427
 428 ;; VSX moves
 429
 430 ;; The patterns for LE permuted loads and stores come before the general
 431 ;; VSX moves so they match first.
 432 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 433   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
 434         (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
 435   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 436   "#"
 437   "&& 1"
 438   [(set (match_dup 2)
 439         (vec_select:<MODE>
 440           (match_dup 1)
 441           (parallel [(const_int 1) (const_int 0)])))
 442    (set (match_dup 0)
 443         (vec_select:<MODE>
 444           (match_dup 2)
 445           (parallel [(const_int 1) (const_int 0)])))]
 446   "
 447 {
 448   rtx mem = operands[1];
 449
 450   /* Don't apply the swap optimization if we've already performed register
 451      allocation and the hard register destination is not in the altivec
 452      range.  */
 453   if ((MEM_ALIGN (mem) >= 128)
 454       && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
 455           || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
 456     {
 457       rtx mem_address = XEXP (mem, 0);
 458       enum machine_mode mode = GET_MODE (mem);
 459
 460       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 461         {
 462           /* Replace the source memory address with masked address.  */
 463           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 464           emit_insn (lvx_set_expr);
 465           DONE;
 466         }
 467       else if (rs6000_quadword_masked_address_p (mem_address))
 468         {
 469           /* This rtl is already in the form that matches lvx
 470              instruction, so leave it alone.  */
 471           DONE;
 472         }
 473       /* Otherwise, fall through to transform into a swapping load.  */
 474     }
 475   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 476                                        : operands[0];
 477 }
 478   "
 479   [(set_attr "type" "vecload")
 480    (set_attr "length" "8")])
 481
 482 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 483   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
 484         (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
 485   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 486   "#"
 487   "&& 1"
 488   [(set (match_dup 2)
 489         (vec_select:<MODE>
 490           (match_dup 1)
 491           (parallel [(const_int 2) (const_int 3)
 492                      (const_int 0) (const_int 1)])))
 493    (set (match_dup 0)
 494         (vec_select:<MODE>
 495           (match_dup 2)
 496           (parallel [(const_int 2) (const_int 3)
 497                      (const_int 0) (const_int 1)])))]
 498   "
 499 {
 500   rtx mem = operands[1];
 501
 502   /* Don't apply the swap optimization if we've already performed register
 503      allocation and the hard register destination is not in the altivec
 504      range.  */
 505   if ((MEM_ALIGN (mem) >= 128)
 506       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 507           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 508     {
 509       rtx mem_address = XEXP (mem, 0);
 510       enum machine_mode mode = GET_MODE (mem);
 511
 512       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 513         {
 514           /* Replace the source memory address with masked address.  */
 515           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 516           emit_insn (lvx_set_expr);
 517           DONE;
 518         }
 519       else if (rs6000_quadword_masked_address_p (mem_address))
 520         {
 521           /* This rtl is already in the form that matches lvx
 522              instruction, so leave it alone.  */
 523           DONE;
 524         }
 525       /* Otherwise, fall through to transform into a swapping load.  */
 526     }
 527   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 528                                        : operands[0];
 529 }
 530   "
 531   [(set_attr "type" "vecload")
 532    (set_attr "length" "8")])
 533
 534 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 535   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 536         (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
 537   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 538   "#"
 539   "&& 1"
 540   [(set (match_dup 2)
 541         (vec_select:V8HI
 542           (match_dup 1)
 543           (parallel [(const_int 4) (const_int 5)
 544                      (const_int 6) (const_int 7)
 545                      (const_int 0) (const_int 1)
 546                      (const_int 2) (const_int 3)])))
 547    (set (match_dup 0)
 548         (vec_select:V8HI
 549           (match_dup 2)
 550           (parallel [(const_int 4) (const_int 5)
 551                      (const_int 6) (const_int 7)
 552                      (const_int 0) (const_int 1)
 553                      (const_int 2) (const_int 3)])))]
 554   "
 555 {
 556   rtx mem = operands[1];
 557
 558   /* Don't apply the swap optimization if we've already performed register
 559      allocation and the hard register destination is not in the altivec
 560      range.  */
 561   if ((MEM_ALIGN (mem) >= 128)
 562       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 563           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 564     {
 565       rtx mem_address = XEXP (mem, 0);
 566       enum machine_mode mode = GET_MODE (mem);
 567
 568       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 569         {
 570           /* Replace the source memory address with masked address.  */
 571           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 572           emit_insn (lvx_set_expr);
 573           DONE;
 574         }
 575       else if (rs6000_quadword_masked_address_p (mem_address))
 576         {
 577           /* This rtl is already in the form that matches lvx
 578              instruction, so leave it alone.  */
 579           DONE;
 580         }
 581       /* Otherwise, fall through to transform into a swapping load.  */
 582     }
 583   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 584                                        : operands[0];
 585 }
 586   "
 587   [(set_attr "type" "vecload")
 588    (set_attr "length" "8")])
 589
 590 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 591   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 592         (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
 593   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 594   "#"
 595   "&& 1"
 596   [(set (match_dup 2)
 597         (vec_select:V16QI
 598           (match_dup 1)
 599           (parallel [(const_int 8) (const_int 9)
 600                      (const_int 10) (const_int 11)
 601                      (const_int 12) (const_int 13)
 602                      (const_int 14) (const_int 15)
 603                      (const_int 0) (const_int 1)
 604                      (const_int 2) (const_int 3)
 605                      (const_int 4) (const_int 5)
 606                      (const_int 6) (const_int 7)])))
 607    (set (match_dup 0)
 608         (vec_select:V16QI
 609           (match_dup 2)
 610           (parallel [(const_int 8) (const_int 9)
 611                      (const_int 10) (const_int 11)
 612                      (const_int 12) (const_int 13)
 613                      (const_int 14) (const_int 15)
 614                      (const_int 0) (const_int 1)
 615                      (const_int 2) (const_int 3)
 616                      (const_int 4) (const_int 5)
 617                      (const_int 6) (const_int 7)])))]
 618   "
 619 {
 620   rtx mem = operands[1];
 621
 622   /* Don't apply the swap optimization if we've already performed register
 623      allocation and the hard register destination is not in the altivec
 624      range.  */
 625   if ((MEM_ALIGN (mem) >= 128)
 626       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 627           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 628     {
 629       rtx mem_address = XEXP (mem, 0);
 630       enum machine_mode mode = GET_MODE (mem);
 631
 632       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 633         {
 634           /* Replace the source memory address with masked address.  */
 635           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 636           emit_insn (lvx_set_expr);
 637           DONE;
 638         }
 639       else if (rs6000_quadword_masked_address_p (mem_address))
 640         {
 641           /* This rtl is already in the form that matches lvx
 642              instruction, so leave it alone.  */
 643           DONE;
 644         }
 645       /* Otherwise, fall through to transform into a swapping load.  */
 646     }
 647   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 648                                        : operands[0];
 649 }
 650   "
 651   [(set_attr "type" "vecload")
 652    (set_attr "length" "8")])
 653
 654 (define_insn "*vsx_le_perm_store_<mode>"
 655   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
 656         (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
 657   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 658   "#"
 659   [(set_attr "type" "vecstore")
 660    (set_attr "length" "12")])
 661
 662 (define_split
 663   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "")
 664         (match_operand:VSX_D 1 "vsx_register_operand" ""))]
 665   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 666   [(set (match_dup 2)
 667         (vec_select:<MODE>
 668           (match_dup 1)
 669           (parallel [(const_int 1) (const_int 0)])))
 670    (set (match_dup 0)
 671         (vec_select:<MODE>
 672           (match_dup 2)
 673           (parallel [(const_int 1) (const_int 0)])))]
 674 {
 675   rtx mem = operands[0];
 676
 677   /* Don't apply the swap optimization if we've already performed register
 678      allocation and the hard register source is not in the altivec range.  */
 679   if ((MEM_ALIGN (mem) >= 128)
 680       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 681           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 682     {
 683       rtx mem_address = XEXP (mem, 0);
 684       enum machine_mode mode = GET_MODE (mem);
 685       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 686         {
 687           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 688           emit_insn (stvx_set_expr);
 689           DONE;
 690         }
 691       else if (rs6000_quadword_masked_address_p (mem_address))
 692         {
 693           /* This rtl is already in the form that matches stvx instruction,
 694              so leave it alone.  */
 695           DONE;
 696         }
 697       /* Otherwise, fall through to transform into a swapping store.  */
 698     }
 699
 700   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 701                                        : operands[1];
 702 })
 703
 704 ;; The post-reload split requires that we re-permute the source
 705 ;; register in case it is still live.
 706 (define_split
 707   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "")
 708         (match_operand:VSX_D 1 "vsx_register_operand" ""))]
 709   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 710   [(set (match_dup 1)
 711         (vec_select:<MODE>
 712           (match_dup 1)
 713           (parallel [(const_int 1) (const_int 0)])))
 714    (set (match_dup 0)
 715         (vec_select:<MODE>
 716           (match_dup 1)
 717           (parallel [(const_int 1) (const_int 0)])))
 718    (set (match_dup 1)
 719         (vec_select:<MODE>
 720           (match_dup 1)
 721           (parallel [(const_int 1) (const_int 0)])))]
 722   "")
 723
 724 (define_insn "*vsx_le_perm_store_<mode>"
 725   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
 726         (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
 727   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 728   "#"
 729   [(set_attr "type" "vecstore")
 730    (set_attr "length" "12")])
 731
 732 (define_split
 733   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "")
 734         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 735   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 736   [(set (match_dup 2)
 737         (vec_select:<MODE>
 738           (match_dup 1)
 739           (parallel [(const_int 2) (const_int 3)
 740                      (const_int 0) (const_int 1)])))
 741    (set (match_dup 0)
 742         (vec_select:<MODE>
 743           (match_dup 2)
 744           (parallel [(const_int 2) (const_int 3)
 745                      (const_int 0) (const_int 1)])))]
 746 {
 747   rtx mem = operands[0];
 748
 749   /* Don't apply the swap optimization if we've already performed register
 750      allocation and the hard register source is not in the altivec range.  */
 751   if ((MEM_ALIGN (mem) >= 128)
 752       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 753           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 754     {
 755       rtx mem_address = XEXP (mem, 0);
 756       enum machine_mode mode = GET_MODE (mem);
 757       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 758         {
 759           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 760           emit_insn (stvx_set_expr);
 761           DONE;
 762         }
 763       else if (rs6000_quadword_masked_address_p (mem_address))
 764         {
 765           /* This rtl is already in the form that matches stvx instruction,
 766              so leave it alone.  */
 767           DONE;
 768         }
 769       /* Otherwise, fall through to transform into a swapping store.  */
 770     }
 771
 772   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 773                                        : operands[1];
 774 })
 775
 776 ;; The post-reload split requires that we re-permute the source
 777 ;; register in case it is still live.
 778 (define_split
 779   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "")
 780         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 781   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 782   [(set (match_dup 1)
 783         (vec_select:<MODE>
 784           (match_dup 1)
 785           (parallel [(const_int 2) (const_int 3)
 786                      (const_int 0) (const_int 1)])))
 787    (set (match_dup 0)
 788         (vec_select:<MODE>
 789           (match_dup 1)
 790           (parallel [(const_int 2) (const_int 3)
 791                      (const_int 0) (const_int 1)])))
 792    (set (match_dup 1)
 793         (vec_select:<MODE>
 794           (match_dup 1)
 795           (parallel [(const_int 2) (const_int 3)
 796                      (const_int 0) (const_int 1)])))]
 797   "")
 798
 799 (define_insn "*vsx_le_perm_store_v8hi"
 800   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
 801         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 802   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 803   "#"
 804   [(set_attr "type" "vecstore")
 805    (set_attr "length" "12")])
 806
 807 (define_split
 808   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "")
 809         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 810   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 811   [(set (match_dup 2)
 812         (vec_select:V8HI
 813           (match_dup 1)
 814           (parallel [(const_int 4) (const_int 5)
 815                      (const_int 6) (const_int 7)
 816                      (const_int 0) (const_int 1)
 817                      (const_int 2) (const_int 3)])))
 818    (set (match_dup 0)
 819         (vec_select:V8HI
 820           (match_dup 2)
 821           (parallel [(const_int 4) (const_int 5)
 822                      (const_int 6) (const_int 7)
 823                      (const_int 0) (const_int 1)
 824                      (const_int 2) (const_int 3)])))]
 825 {
 826   rtx mem = operands[0];
 827
 828   /* Don't apply the swap optimization if we've already performed register
 829      allocation and the hard register source is not in the altivec range.  */
 830   if ((MEM_ALIGN (mem) >= 128)
 831       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 832           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 833     {
 834       rtx mem_address = XEXP (mem, 0);
 835       enum machine_mode mode = GET_MODE (mem);
 836       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 837         {
 838           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 839           emit_insn (stvx_set_expr);
 840           DONE;
 841         }
 842       else if (rs6000_quadword_masked_address_p (mem_address))
 843         {
 844           /* This rtl is already in the form that matches stvx instruction,
 845              so leave it alone.  */
 846           DONE;
 847         }
 848       /* Otherwise, fall through to transform into a swapping store.  */
 849     }
 850
 851   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 852                                        : operands[1];
 853 })
 854
 855 ;; The post-reload split requires that we re-permute the source
 856 ;; register in case it is still live.
 857 (define_split
 858   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "")
 859         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 860   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 861   [(set (match_dup 1)
 862         (vec_select:V8HI
 863           (match_dup 1)
 864           (parallel [(const_int 4) (const_int 5)
 865                      (const_int 6) (const_int 7)
 866                      (const_int 0) (const_int 1)
 867                      (const_int 2) (const_int 3)])))
 868    (set (match_dup 0)
 869         (vec_select:V8HI
 870           (match_dup 1)
 871           (parallel [(const_int 4) (const_int 5)
 872                      (const_int 6) (const_int 7)
 873                      (const_int 0) (const_int 1)
 874                      (const_int 2) (const_int 3)])))
 875    (set (match_dup 1)
 876         (vec_select:V8HI
 877           (match_dup 1)
 878           (parallel [(const_int 4) (const_int 5)
 879                      (const_int 6) (const_int 7)
 880                      (const_int 0) (const_int 1)
 881                      (const_int 2) (const_int 3)])))]
 882   "")
 883
 884 (define_insn "*vsx_le_perm_store_v16qi"
 885   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
 886         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 887   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 888   "#"
 889   [(set_attr "type" "vecstore")
 890    (set_attr "length" "12")])
 891
 892 (define_split
 893   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "")
 894         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 895   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 896   [(set (match_dup 2)
 897         (vec_select:V16QI
 898           (match_dup 1)
 899           (parallel [(const_int 8) (const_int 9)
 900                      (const_int 10) (const_int 11)
 901                      (const_int 12) (const_int 13)
 902                      (const_int 14) (const_int 15)
 903                      (const_int 0) (const_int 1)
 904                      (const_int 2) (const_int 3)
 905                      (const_int 4) (const_int 5)
 906                      (const_int 6) (const_int 7)])))
 907    (set (match_dup 0)
 908         (vec_select:V16QI
 909           (match_dup 2)
 910           (parallel [(const_int 8) (const_int 9)
 911                      (const_int 10) (const_int 11)
 912                      (const_int 12) (const_int 13)
 913                      (const_int 14) (const_int 15)
 914                      (const_int 0) (const_int 1)
 915                      (const_int 2) (const_int 3)
 916                      (const_int 4) (const_int 5)
 917                      (const_int 6) (const_int 7)])))]
 918 {
 919   rtx mem = operands[0];
 920
 921   /* Don't apply the swap optimization if we've already performed register
 922      allocation and the hard register source is not in the altivec range.  */
 923   if ((MEM_ALIGN (mem) >= 128)
 924       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 925           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 926     {
 927       rtx mem_address = XEXP (mem, 0);
 928       enum machine_mode mode = GET_MODE (mem);
 929       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 930         {
 931           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 932           emit_insn (stvx_set_expr);
 933           DONE;
 934         }
 935       else if (rs6000_quadword_masked_address_p (mem_address))
 936         {
 937           /* This rtl is already in the form that matches stvx instruction,
 938              so leave it alone.  */
 939           DONE;
 940         }
 941       /* Otherwise, fall through to transform into a swapping store.  */
 942     }
 943
 944   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 945                                        : operands[1];
 946 })
 947
 948 ;; The post-reload split requires that we re-permute the source
 949 ;; register in case it is still live.
 950 (define_split
 951   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "")
 952         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 953   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 954   [(set (match_dup 1)
 955         (vec_select:V16QI
 956           (match_dup 1)
 957           (parallel [(const_int 8) (const_int 9)
 958                      (const_int 10) (const_int 11)
 959                      (const_int 12) (const_int 13)
 960                      (const_int 14) (const_int 15)
 961                      (const_int 0) (const_int 1)
 962                      (const_int 2) (const_int 3)
 963                      (const_int 4) (const_int 5)
 964                      (const_int 6) (const_int 7)])))
 965    (set (match_dup 0)
 966         (vec_select:V16QI
 967           (match_dup 1)
 968           (parallel [(const_int 8) (const_int 9)
 969                      (const_int 10) (const_int 11)
 970                      (const_int 12) (const_int 13)
 971                      (const_int 14) (const_int 15)
 972                      (const_int 0) (const_int 1)
 973                      (const_int 2) (const_int 3)
 974                      (const_int 4) (const_int 5)
 975                      (const_int 6) (const_int 7)])))
 976    (set (match_dup 1)
 977         (vec_select:V16QI
 978           (match_dup 1)
 979           (parallel [(const_int 8) (const_int 9)
 980                      (const_int 10) (const_int 11)
 981                      (const_int 12) (const_int 13)
 982                      (const_int 14) (const_int 15)
 983                      (const_int 0) (const_int 1)
 984                      (const_int 2) (const_int 3)
 985                      (const_int 4) (const_int 5)
 986                      (const_int 6) (const_int 7)])))]
 987   "")
 988
 989 ;; Little endian word swapping for 128-bit types that are either scalars or the
 990 ;; special V1TI container class, which it is not appropriate to use vec_select
 991 ;; for the type.
 992 (define_insn "*vsx_le_permute_<mode>"
 993   [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
 994         (rotate:VSX_TI
 995          (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
 996          (const_int 64)))]
 997   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 998   "@
 999    xxpermdi %x0,%x1,%x1,2
1000    lxvd2x %x0,%y1
1001    stxvd2x %x1,%y0
1002    mr %0,%L1\;mr %L0,%1
1003    ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
1004    std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
1005   [(set_attr "length" "4,4,4,8,8,8")
1006    (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
1007
1008 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
1009   [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
1010         (rotate:VSX_TI
1011          (rotate:VSX_TI
1012           (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
1013           (const_int 64))
1014          (const_int 64)))]
1015   "!BYTES_BIG_ENDIAN && TARGET_VSX"
1016   "@
1017    #
1018    xxlor %x0,%x1"
1019   ""
1020   [(set (match_dup 0) (match_dup 1))]
1021 {
1022   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1023     {
1024       emit_note (NOTE_INSN_DELETED);
1025       DONE;
1026     }
1027 }
1028   [(set_attr "length" "0,4")
1029    (set_attr "type" "veclogical")])
1030
1031 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1032   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1033         (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1034   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1035   "@
1036    #
1037    #"
1038   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1039   [(const_int 0)]
1040   "
1041 {
1042   rtx tmp = (can_create_pseudo_p ()
1043              ? gen_reg_rtx_and_attrs (operands[0])
1044              : operands[0]);
1045   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1046   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1047   DONE;
1048 }
1049   "
1050   [(set_attr "type" "vecload,load")
1051    (set_attr "length" "8,8")])
1052
1053 (define_insn "*vsx_le_perm_store_<mode>"
1054   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1055         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
1056   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1057   "@
1058    #
1059    #"
1060   [(set_attr "type" "vecstore,store")
1061    (set_attr "length" "12,8")])
1062
1063 (define_split
1064   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
1065         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
1066   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1067   [(const_int 0)]
1068 {
1069   rtx tmp = (can_create_pseudo_p ()
1070              ? gen_reg_rtx_and_attrs (operands[0])
1071              : operands[0]);
1072   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1073   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1074   DONE;
1075 })
1076
1077 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1078 ;; GPR registers on a little endian system.
1079 (define_peephole2
1080   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1081         (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1082                        (const_int 64)))
1083    (set (match_operand:VSX_TI 2 "int_reg_operand")
1084         (rotate:VSX_TI (match_dup 0)
1085                        (const_int 64)))]
1086   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1087    && (rtx_equal_p (operands[0], operands[2])
1088        || peep2_reg_dead_p (2, operands[0]))"
1089    [(set (match_dup 2) (match_dup 1))])
1090
1091 (define_peephole2
1092   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1093         (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1094                        (const_int 64)))
1095    (set (match_operand:VSX_TI 2 "memory_operand")
1096         (rotate:VSX_TI (match_dup 0)
1097                        (const_int 64)))]
1098   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1099    && peep2_reg_dead_p (2, operands[0])"
1100    [(set (match_dup 2) (match_dup 1))])
1101
1102 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1103 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1104 ;; floating point are handled by the more generic swap elimination pass.
1105 (define_peephole2
1106   [(set (match_operand:TI 0 "vsx_register_operand" "")
1107         (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
1108                    (const_int 64)))
1109    (set (match_operand:TI 2 "vsx_register_operand" "")
1110         (rotate:TI (match_dup 0)
1111                    (const_int 64)))]
1112   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1113    && (rtx_equal_p (operands[0], operands[2])
1114        || peep2_reg_dead_p (2, operands[0]))"
1115    [(set (match_dup 2) (match_dup 1))])
1116
1117 ;; The post-reload split requires that we re-permute the source
1118 ;; register in case it is still live.
1119 (define_split
1120   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
1121         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
1122   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1123   [(const_int 0)]
1124 {
1125   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1126   rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1127   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1128   DONE;
1129 })
1130
1131 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1132 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1133 (define_insn "xxspltib_v16qi"
1134   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1135         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1136   "TARGET_P9_VECTOR"
1137 {
1138   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1139   return "xxspltib %x0,%2";
1140 }
1141   [(set_attr "type" "vecperm")])
1142
1143 (define_insn "xxspltib_<mode>_nosplit"
1144   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1145         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1146   "TARGET_P9_VECTOR"
1147 {
1148   rtx op1 = operands[1];
1149   int value = 256;
1150   int num_insns = -1;
1151
1152   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1153       || num_insns != 1)
1154     gcc_unreachable ();
1155
1156   operands[2] = GEN_INT (value & 0xff);
1157   return "xxspltib %x0,%2";
1158 }
1159   [(set_attr "type" "vecperm")])
1160
1161 (define_insn_and_split "*xxspltib_<mode>_split"
1162   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1163         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1164   "TARGET_P9_VECTOR"
1165   "#"
1166   "&& 1"
1167   [(const_int 0)]
1168 {
1169   int value = 256;
1170   int num_insns = -1;
1171   rtx op0 = operands[0];
1172   rtx op1 = operands[1];
1173   rtx tmp = ((can_create_pseudo_p ())
1174              ? gen_reg_rtx (V16QImode)
1175              : gen_lowpart (V16QImode, op0));
1176
1177   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1178       || num_insns != 2)
1179     gcc_unreachable ();
1180
1181   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1182
1183   if (<MODE>mode == V2DImode)
1184     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1185
1186   else if (<MODE>mode == V4SImode)
1187     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1188
1189   else if (<MODE>mode == V8HImode)
1190     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1191
1192   else
1193     gcc_unreachable ();
1194
1195   DONE;
1196 }
1197   [(set_attr "type" "vecperm")
1198    (set_attr "length" "8")])
1199
1200
1201 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1202 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1203 ;; all 1's, since the machine does not have to wait for the previous
1204 ;; instruction using the register being set (such as a store waiting on a slow
1205 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1206
1207 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1208 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1209 ;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
1210 (define_insn "*vsx_mov<mode>_64bit"
1211   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1212                "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
1213                 ?&r,       ??r,       ??Y,       ??r,       wo,        v,
1214                 ?<VSa>,    *r,        v,         ??r,       wZ,        v")
1215
1216         (match_operand:VSX_M 1 "input_operand"
1217                "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
1218                 wQ,        Y,         r,         r,         wE,        jwM,
1219                 ?jwM,      jwM,       W,         W,         v,         wZ"))]
1220
1221   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1222    && (register_operand (operands[0], <MODE>mode)
1223        || register_operand (operands[1], <MODE>mode))"
1224 {
1225   return rs6000_output_move_128bit (operands);
1226 }
1227   [(set_attr "type"
1228                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1229                 store,     load,      store,     *,         vecsimple, vecsimple,
1230                 vecsimple, *,         *,         *,         vecstore,  vecload")
1231
1232    (set_attr "length"
1233                "4,         4,         4,         8,         4,         8,
1234                 8,         8,         8,         8,         4,         4,
1235                 4,         8,         20,        20,        4,         4")])
1236
1237 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1238 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
1239 ;;              LVX (VMX)  STVX (VMX)
1240 (define_insn "*vsx_mov<mode>_32bit"
1241   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1242                "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       ??r,
1243                 wo,        v,         ?<VSa>,    *r,        v,         ??r,
1244                 wZ,        v")
1245
1246         (match_operand:VSX_M 1 "input_operand"
1247                "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
1248                 wE,        jwM,       ?jwM,      jwM,       W,         W,
1249                 v,         wZ"))]
1250
1251   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1252    && (register_operand (operands[0], <MODE>mode)
1253        || register_operand (operands[1], <MODE>mode))"
1254 {
1255   return rs6000_output_move_128bit (operands);
1256 }
1257   [(set_attr "type"
1258                "vecstore,  vecload,   vecsimple, load,      store,    *,
1259                 vecsimple, vecsimple, vecsimple, *,         *,        *,
1260                 vecstore,  vecload")
1261
1262    (set_attr "length"
1263                "4,         4,         4,         16,        16,        16,
1264                 4,         4,         4,         16,        20,        32,
1265                 4,         4")])
1266
1267 ;; Explicit  load/store expanders for the builtin functions
1268 (define_expand "vsx_load_<mode>"
1269   [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
1270         (match_operand:VSX_M 1 "memory_operand" ""))]
1271   "VECTOR_MEM_VSX_P (<MODE>mode)"
1272 {
1273   /* Expand to swaps if needed, prior to swap optimization.  */
1274   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1275     {
1276       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1277       DONE;
1278     }
1279 })
1280
1281 (define_expand "vsx_store_<mode>"
1282   [(set (match_operand:VSX_M 0 "memory_operand" "")
1283         (match_operand:VSX_M 1 "vsx_register_operand" ""))]
1284   "VECTOR_MEM_VSX_P (<MODE>mode)"
1285 {
1286   /* Expand to swaps if needed, prior to swap optimization.  */
1287   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1288     {
1289       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1290       DONE;
1291     }
1292 })
1293
1294 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1295 ;; when you really want their element-reversing behavior.
1296 (define_insn "vsx_ld_elemrev_v2di"
1297   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1298         (vec_select:V2DI
1299           (match_operand:V2DI 1 "memory_operand" "Z")
1300           (parallel [(const_int 1) (const_int 0)])))]
1301   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1302   "lxvd2x %x0,%y1"
1303   [(set_attr "type" "vecload")])
1304
1305 (define_insn "vsx_ld_elemrev_v1ti"
1306   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1307         (vec_select:V1TI
1308           (match_operand:V1TI 1 "memory_operand" "Z")
1309           (parallel [(const_int 0)])))]
1310   "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1311 {
1312    return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1313 }
1314   [(set_attr "type" "vecload")])
1315
1316 (define_insn "vsx_ld_elemrev_v2df"
1317   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1318         (vec_select:V2DF
1319           (match_operand:V2DF 1 "memory_operand" "Z")
1320           (parallel [(const_int 1) (const_int 0)])))]
1321   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1322   "lxvd2x %x0,%y1"
1323   [(set_attr "type" "vecload")])
1324
1325 (define_insn "vsx_ld_elemrev_v4si"
1326   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1327         (vec_select:V4SI
1328           (match_operand:V4SI 1 "memory_operand" "Z")
1329           (parallel [(const_int 3) (const_int 2)
1330                      (const_int 1) (const_int 0)])))]
1331   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1332   "lxvw4x %x0,%y1"
1333   [(set_attr "type" "vecload")])
1334
1335 (define_insn "vsx_ld_elemrev_v4sf"
1336   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1337         (vec_select:V4SF
1338           (match_operand:V4SF 1 "memory_operand" "Z")
1339           (parallel [(const_int 3) (const_int 2)
1340                      (const_int 1) (const_int 0)])))]
1341   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1342   "lxvw4x %x0,%y1"
1343   [(set_attr "type" "vecload")])
1344
1345 (define_expand "vsx_ld_elemrev_v8hi"
1346   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1347         (vec_select:V8HI
1348           (match_operand:V8HI 1 "memory_operand" "Z")
1349           (parallel [(const_int 7) (const_int 6)
1350                      (const_int 5) (const_int 4)
1351                      (const_int 3) (const_int 2)
1352                      (const_int 1) (const_int 0)])))]
1353   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1354 {
1355   if (!TARGET_P9_VECTOR)
1356     {
1357       rtx tmp = gen_reg_rtx (V4SImode);
1358       rtx subreg, subreg2, perm[16], pcv;
1359       /* 2 is leftmost element in register */
1360       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1361       int i;
1362
1363       subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1364       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1365       subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1366
1367       for (i = 0; i < 16; ++i)
1368         perm[i] = GEN_INT (reorder[i]);
1369
1370       pcv = force_reg (V16QImode,
1371                        gen_rtx_CONST_VECTOR (V16QImode,
1372                                              gen_rtvec_v (16, perm)));
1373       emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1374                                                 subreg2, pcv));
1375       DONE;
1376     }
1377 })
1378
1379 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1380   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1381         (vec_select:V8HI
1382           (match_operand:V8HI 1 "memory_operand" "Z")
1383           (parallel [(const_int 7) (const_int 6)
1384                      (const_int 5) (const_int 4)
1385                      (const_int 3) (const_int 2)
1386                      (const_int 1) (const_int 0)])))]
1387   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1388   "lxvh8x %x0,%y1"
1389   [(set_attr "type" "vecload")])
1390
1391 (define_expand "vsx_ld_elemrev_v16qi"
1392   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1393         (vec_select:V16QI
1394           (match_operand:V16QI 1 "memory_operand" "Z")
1395           (parallel [(const_int 15) (const_int 14)
1396                      (const_int 13) (const_int 12)
1397                      (const_int 11) (const_int 10)
1398                      (const_int  9) (const_int  8)
1399                      (const_int  7) (const_int  6)
1400                      (const_int  5) (const_int  4)
1401                      (const_int  3) (const_int  2)
1402                      (const_int  1) (const_int  0)])))]
1403   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1404 {
1405   if (!TARGET_P9_VECTOR)
1406     {
1407       rtx tmp = gen_reg_rtx (V4SImode);
1408       rtx subreg, subreg2, perm[16], pcv;
1409       /* 3 is leftmost element in register */
1410       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1411       int i;
1412
1413       subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1414       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1415       subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1416
1417       for (i = 0; i < 16; ++i)
1418         perm[i] = GEN_INT (reorder[i]);
1419
1420       pcv = force_reg (V16QImode,
1421                        gen_rtx_CONST_VECTOR (V16QImode,
1422                                              gen_rtvec_v (16, perm)));
1423       emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1424                                                  subreg2, pcv));
1425       DONE;
1426     }
1427 })
1428
1429 (define_insn "*vsx_ld_elemrev_v16qi_internal"
1430   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1431         (vec_select:V16QI
1432           (match_operand:V16QI 1 "memory_operand" "Z")
1433           (parallel [(const_int 15) (const_int 14)
1434                      (const_int 13) (const_int 12)
1435                      (const_int 11) (const_int 10)
1436                      (const_int  9) (const_int  8)
1437                      (const_int  7) (const_int  6)
1438                      (const_int  5) (const_int  4)
1439                      (const_int  3) (const_int  2)
1440                      (const_int  1) (const_int  0)])))]
1441   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1442   "lxvb16x %x0,%y1"
1443   [(set_attr "type" "vecload")])
1444
1445 (define_insn "vsx_st_elemrev_v1ti"
1446   [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1447         (vec_select:V1TI
1448           (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1449           (parallel [(const_int 0)])))
1450    (clobber (match_dup 1))]
1451   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1452 {
1453   return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1454 }
1455   [(set_attr "type" "vecstore")])
1456
1457 (define_insn "vsx_st_elemrev_v2df"
1458   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1459         (vec_select:V2DF
1460           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1461           (parallel [(const_int 1) (const_int 0)])))]
1462   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1463   "stxvd2x %x1,%y0"
1464   [(set_attr "type" "vecstore")])
1465
1466 (define_insn "vsx_st_elemrev_v2di"
1467   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1468         (vec_select:V2DI
1469           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1470           (parallel [(const_int 1) (const_int 0)])))]
1471   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1472   "stxvd2x %x1,%y0"
1473   [(set_attr "type" "vecstore")])
1474
1475 (define_insn "vsx_st_elemrev_v4sf"
1476   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1477         (vec_select:V4SF
1478           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1479           (parallel [(const_int 3) (const_int 2)
1480                      (const_int 1) (const_int 0)])))]
1481   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1482   "stxvw4x %x1,%y0"
1483   [(set_attr "type" "vecstore")])
1484
1485 (define_insn "vsx_st_elemrev_v4si"
1486   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1487         (vec_select:V4SI
1488           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1489           (parallel [(const_int 3) (const_int 2)
1490                      (const_int 1) (const_int 0)])))]
1491   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1492   "stxvw4x %x1,%y0"
1493   [(set_attr "type" "vecstore")])
1494
1495 (define_expand "vsx_st_elemrev_v8hi"
1496   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1497         (vec_select:V8HI
1498           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1499           (parallel [(const_int 7) (const_int 6)
1500                      (const_int 5) (const_int 4)
1501                      (const_int 3) (const_int 2)
1502                      (const_int 1) (const_int 0)])))]
1503   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1504 {
1505   if (!TARGET_P9_VECTOR)
1506     {
1507       rtx mem_subreg, subreg, perm[16], pcv;
1508       rtx tmp = gen_reg_rtx (V8HImode);
1509       /* 2 is leftmost element in register */
1510       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1511       int i;
1512
1513       for (i = 0; i < 16; ++i)
1514         perm[i] = GEN_INT (reorder[i]);
1515
1516       pcv = force_reg (V16QImode,
1517                        gen_rtx_CONST_VECTOR (V16QImode,
1518                                              gen_rtvec_v (16, perm)));
1519       emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1520                                                 operands[1], pcv));
1521       subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1522       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1523       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1524       DONE;
1525     }
1526 })
1527
1528 (define_insn "*vsx_st_elemrev_v2di_internal"
1529   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1530         (vec_select:V2DI
1531           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1532           (parallel [(const_int 1) (const_int 0)])))]
1533   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1534   "stxvd2x %x1,%y0"
1535   [(set_attr "type" "vecstore")])
1536
1537 (define_insn "*vsx_st_elemrev_v8hi_internal"
1538   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1539         (vec_select:V8HI
1540           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1541           (parallel [(const_int 7) (const_int 6)
1542                      (const_int 5) (const_int 4)
1543                      (const_int 3) (const_int 2)
1544                      (const_int 1) (const_int 0)])))]
1545   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1546   "stxvh8x %x1,%y0"
1547   [(set_attr "type" "vecstore")])
1548
1549 (define_expand "vsx_st_elemrev_v16qi"
1550   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1551         (vec_select:V16QI
1552           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1553           (parallel [(const_int 15) (const_int 14)
1554                      (const_int 13) (const_int 12)
1555                      (const_int 11) (const_int 10)
1556                      (const_int  9) (const_int  8)
1557                      (const_int  7) (const_int  6)
1558                      (const_int  5) (const_int  4)
1559                      (const_int  3) (const_int  2)
1560                      (const_int  1) (const_int  0)])))]
1561   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1562 {
1563   if (!TARGET_P9_VECTOR)
1564     {
1565       rtx mem_subreg, subreg, perm[16], pcv;
1566       rtx tmp = gen_reg_rtx (V16QImode);
1567       /* 3 is leftmost element in register */
1568       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1569       int i;
1570
1571       for (i = 0; i < 16; ++i)
1572         perm[i] = GEN_INT (reorder[i]);
1573
1574       pcv = force_reg (V16QImode,
1575                        gen_rtx_CONST_VECTOR (V16QImode,
1576                                              gen_rtvec_v (16, perm)));
1577       emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1578                                                  operands[1], pcv));
1579       subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1580       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1581       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1582       DONE;
1583     }
1584 })
1585
1586 (define_insn "*vsx_st_elemrev_v16qi_internal"
1587   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1588         (vec_select:V16QI
1589           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1590           (parallel [(const_int 15) (const_int 14)
1591                      (const_int 13) (const_int 12)
1592                      (const_int 11) (const_int 10)
1593                      (const_int  9) (const_int  8)
1594                      (const_int  7) (const_int  6)
1595                      (const_int  5) (const_int  4)
1596                      (const_int  3) (const_int  2)
1597                      (const_int  1) (const_int  0)])))]
1598   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1599   "stxvb16x %x1,%y0"
1600   [(set_attr "type" "vecstore")])
1601
1602 \f
1603 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1604 ;; instructions are now combined with the insn for the traditional floating
1605 ;; point unit.
1606 (define_insn "*vsx_add<mode>3"
1607   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1608         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1609                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1610   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1611   "xvadd<VSs> %x0,%x1,%x2"
1612   [(set_attr "type" "<VStype_simple>")
1613    (set_attr "fp_type" "<VSfptype_simple>")])
1614
1615 (define_insn "*vsx_sub<mode>3"
1616   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1617         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1618                      (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1619   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1620   "xvsub<VSs> %x0,%x1,%x2"
1621   [(set_attr "type" "<VStype_simple>")
1622    (set_attr "fp_type" "<VSfptype_simple>")])
1623
1624 (define_insn "*vsx_mul<mode>3"
1625   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1626         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1627                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1628   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1629   "xvmul<VSs> %x0,%x1,%x2"
1630   [(set_attr "type" "<VStype_simple>")
1631    (set_attr "fp_type" "<VSfptype_mul>")])
1632
1633 ; Emulate vector with scalar for vec_mul in V2DImode
1634 (define_insn_and_split "vsx_mul_v2di"
1635   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1636         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1637                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1638                      UNSPEC_VSX_MULSD))]
1639   "VECTOR_MEM_VSX_P (V2DImode)"
1640   "#"
1641   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1642   [(const_int 0)]
1643   "
1644 {
1645   rtx op0 = operands[0];
1646   rtx op1 = operands[1];
1647   rtx op2 = operands[2];
1648   rtx op3 = gen_reg_rtx (DImode);
1649   rtx op4 = gen_reg_rtx (DImode);
1650   rtx op5 = gen_reg_rtx (DImode);
1651   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1652   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1653   if (TARGET_POWERPC64)
1654     emit_insn (gen_muldi3 (op5, op3, op4));
1655   else
1656     {
1657       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1658       emit_move_insn (op5, ret);
1659     }
1660   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1661   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1662   if (TARGET_POWERPC64)
1663     emit_insn (gen_muldi3 (op3, op3, op4));
1664   else
1665     {
1666       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1667       emit_move_insn (op3, ret);
1668     }
1669   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1670   DONE;
1671 }"
1672   [(set_attr "type" "mul")])
1673
1674 (define_insn "*vsx_div<mode>3"
1675   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1676         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1677                    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1678   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1679   "xvdiv<VSs> %x0,%x1,%x2"
1680   [(set_attr "type" "<VStype_div>")
1681    (set_attr "fp_type" "<VSfptype_div>")])
1682
1683 ; Emulate vector with scalar for vec_div in V2DImode
1684 (define_insn_and_split "vsx_div_v2di"
1685   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1686         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1687                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1688                      UNSPEC_VSX_DIVSD))]
1689   "VECTOR_MEM_VSX_P (V2DImode)"
1690   "#"
1691   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1692   [(const_int 0)]
1693   "
1694 {
1695   rtx op0 = operands[0];
1696   rtx op1 = operands[1];
1697   rtx op2 = operands[2];
1698   rtx op3 = gen_reg_rtx (DImode);
1699   rtx op4 = gen_reg_rtx (DImode);
1700   rtx op5 = gen_reg_rtx (DImode);
1701   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1702   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1703   if (TARGET_POWERPC64)
1704     emit_insn (gen_divdi3 (op5, op3, op4));
1705   else
1706     {
1707       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1708       rtx target = emit_library_call_value (libfunc,
1709                                             op5, LCT_NORMAL, DImode,
1710                                             op3, DImode,
1711                                             op4, DImode);
1712       emit_move_insn (op5, target);
1713     }
1714   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1715   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1716   if (TARGET_POWERPC64)
1717     emit_insn (gen_divdi3 (op3, op3, op4));
1718   else
1719     {
1720       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1721       rtx target = emit_library_call_value (libfunc,
1722                                             op3, LCT_NORMAL, DImode,
1723                                             op3, DImode,
1724                                             op4, DImode);
1725       emit_move_insn (op3, target);
1726     }
1727   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1728   DONE;
1729 }"
1730   [(set_attr "type" "div")])
1731
1732 (define_insn_and_split "vsx_udiv_v2di"
1733   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1734         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1735                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1736                      UNSPEC_VSX_DIVUD))]
1737   "VECTOR_MEM_VSX_P (V2DImode)"
1738   "#"
1739   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1740   [(const_int 0)]
1741   "
1742 {
1743   rtx op0 = operands[0];
1744   rtx op1 = operands[1];
1745   rtx op2 = operands[2];
1746   rtx op3 = gen_reg_rtx (DImode);
1747   rtx op4 = gen_reg_rtx (DImode);
1748   rtx op5 = gen_reg_rtx (DImode);
1749   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1750   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1751   if (TARGET_POWERPC64)
1752     emit_insn (gen_udivdi3 (op5, op3, op4));
1753   else
1754     {
1755       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1756       rtx target = emit_library_call_value (libfunc,
1757                                             op5, LCT_NORMAL, DImode,
1758                                             op3, DImode,
1759                                             op4, DImode);
1760       emit_move_insn (op5, target);
1761     }
1762   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1763   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1764   if (TARGET_POWERPC64)
1765     emit_insn (gen_udivdi3 (op3, op3, op4));
1766   else
1767     {
1768       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1769       rtx target = emit_library_call_value (libfunc,
1770                                             op3, LCT_NORMAL, DImode,
1771                                             op3, DImode,
1772                                             op4, DImode);
1773       emit_move_insn (op3, target);
1774     }
1775   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1776   DONE;
1777 }"
1778   [(set_attr "type" "div")])
1779
1780 ;; *tdiv* instruction returning the FG flag
1781 (define_expand "vsx_tdiv<mode>3_fg"
1782   [(set (match_dup 3)
1783         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1784                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
1785                      UNSPEC_VSX_TDIV))
1786    (set (match_operand:SI 0 "gpc_reg_operand" "")
1787         (gt:SI (match_dup 3)
1788                (const_int 0)))]
1789   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1790 {
1791   operands[3] = gen_reg_rtx (CCFPmode);
1792 })
1793
1794 ;; *tdiv* instruction returning the FE flag
1795 (define_expand "vsx_tdiv<mode>3_fe"
1796   [(set (match_dup 3)
1797         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1798                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
1799                      UNSPEC_VSX_TDIV))
1800    (set (match_operand:SI 0 "gpc_reg_operand" "")
1801         (eq:SI (match_dup 3)
1802                (const_int 0)))]
1803   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1804 {
1805   operands[3] = gen_reg_rtx (CCFPmode);
1806 })
1807
1808 (define_insn "*vsx_tdiv<mode>3_internal"
1809   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1810         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1811                       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1812                    UNSPEC_VSX_TDIV))]
1813   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1814   "x<VSv>tdiv<VSs> %0,%x1,%x2"
1815   [(set_attr "type" "<VStype_simple>")
1816    (set_attr "fp_type" "<VSfptype_simple>")])
1817
1818 (define_insn "vsx_fre<mode>2"
1819   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1820         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1821                       UNSPEC_FRES))]
1822   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1823   "xvre<VSs> %x0,%x1"
1824   [(set_attr "type" "<VStype_simple>")
1825    (set_attr "fp_type" "<VSfptype_simple>")])
1826
1827 (define_insn "*vsx_neg<mode>2"
1828   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1829         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1830   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1831   "xvneg<VSs> %x0,%x1"
1832   [(set_attr "type" "<VStype_simple>")
1833    (set_attr "fp_type" "<VSfptype_simple>")])
1834
1835 (define_insn "*vsx_abs<mode>2"
1836   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1837         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1838   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1839   "xvabs<VSs> %x0,%x1"
1840   [(set_attr "type" "<VStype_simple>")
1841    (set_attr "fp_type" "<VSfptype_simple>")])
1842
1843 (define_insn "vsx_nabs<mode>2"
1844   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1845         (neg:VSX_F
1846          (abs:VSX_F
1847           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1848   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1849   "xvnabs<VSs> %x0,%x1"
1850   [(set_attr "type" "<VStype_simple>")
1851    (set_attr "fp_type" "<VSfptype_simple>")])
1852
1853 (define_insn "vsx_smax<mode>3"
1854   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1855         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1856                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1857   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1858   "xvmax<VSs> %x0,%x1,%x2"
1859   [(set_attr "type" "<VStype_simple>")
1860    (set_attr "fp_type" "<VSfptype_simple>")])
1861
1862 (define_insn "*vsx_smin<mode>3"
1863   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1864         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1865                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1866   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1867   "xvmin<VSs> %x0,%x1,%x2"
1868   [(set_attr "type" "<VStype_simple>")
1869    (set_attr "fp_type" "<VSfptype_simple>")])
1870
1871 (define_insn "*vsx_sqrt<mode>2"
1872   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1873         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1874   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1875   "xvsqrt<VSs> %x0,%x1"
1876   [(set_attr "type" "<VStype_sqrt>")
1877    (set_attr "fp_type" "<VSfptype_sqrt>")])
1878
1879 (define_insn "*vsx_rsqrte<mode>2"
1880   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1881         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1882                       UNSPEC_RSQRT))]
1883   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1884   "xvrsqrte<VSs> %x0,%x1"
1885   [(set_attr "type" "<VStype_simple>")
1886    (set_attr "fp_type" "<VSfptype_simple>")])
1887
1888 ;; *tsqrt* returning the fg flag
1889 (define_expand "vsx_tsqrt<mode>2_fg"
1890   [(set (match_dup 2)
1891         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1892                      UNSPEC_VSX_TSQRT))
1893    (set (match_operand:SI 0 "gpc_reg_operand" "")
1894         (gt:SI (match_dup 2)
1895                (const_int 0)))]
1896   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1897 {
1898   operands[2] = gen_reg_rtx (CCFPmode);
1899 })
1900
1901 ;; *tsqrt* returning the fe flag
1902 (define_expand "vsx_tsqrt<mode>2_fe"
1903   [(set (match_dup 2)
1904         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1905                      UNSPEC_VSX_TSQRT))
1906    (set (match_operand:SI 0 "gpc_reg_operand" "")
1907         (eq:SI (match_dup 2)
1908                (const_int 0)))]
1909   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1910 {
1911   operands[2] = gen_reg_rtx (CCFPmode);
1912 })
1913
1914 (define_insn "*vsx_tsqrt<mode>2_internal"
1915   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1916         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1917                      UNSPEC_VSX_TSQRT))]
1918   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1919   "x<VSv>tsqrt<VSs> %0,%x1"
1920   [(set_attr "type" "<VStype_simple>")
1921    (set_attr "fp_type" "<VSfptype_simple>")])
1922
1923 ;; Fused vector multiply/add instructions. Support the classical Altivec
1924 ;; versions of fma, which allows the target to be a separate register from the
1925 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1926 ;; multiply.
1927
1928 (define_insn "*vsx_fmav4sf4"
1929   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1930         (fma:V4SF
1931           (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1932           (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1933           (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1934   "VECTOR_UNIT_VSX_P (V4SFmode)"
1935   "@
1936    xvmaddasp %x0,%x1,%x2
1937    xvmaddmsp %x0,%x1,%x3
1938    xvmaddasp %x0,%x1,%x2
1939    xvmaddmsp %x0,%x1,%x3
1940    vmaddfp %0,%1,%2,%3"
1941   [(set_attr "type" "vecfloat")])
1942
1943 (define_insn "*vsx_fmav2df4"
1944   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1945         (fma:V2DF
1946           (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1947           (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1948           (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1949   "VECTOR_UNIT_VSX_P (V2DFmode)"
1950   "@
1951    xvmaddadp %x0,%x1,%x2
1952    xvmaddmdp %x0,%x1,%x3
1953    xvmaddadp %x0,%x1,%x2
1954    xvmaddmdp %x0,%x1,%x3"
1955   [(set_attr "type" "vecdouble")])
1956
1957 (define_insn "*vsx_fms<mode>4"
1958   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1959         (fma:VSX_F
1960           (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1961           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1962           (neg:VSX_F
1963             (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1964   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1965   "@
1966    xvmsuba<VSs> %x0,%x1,%x2
1967    xvmsubm<VSs> %x0,%x1,%x3
1968    xvmsuba<VSs> %x0,%x1,%x2
1969    xvmsubm<VSs> %x0,%x1,%x3"
1970   [(set_attr "type" "<VStype_mul>")])
1971
1972 (define_insn "*vsx_nfma<mode>4"
1973   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1974         (neg:VSX_F
1975          (fma:VSX_F
1976           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1977           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1978           (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1979   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1980   "@
1981    xvnmadda<VSs> %x0,%x1,%x2
1982    xvnmaddm<VSs> %x0,%x1,%x3
1983    xvnmadda<VSs> %x0,%x1,%x2
1984    xvnmaddm<VSs> %x0,%x1,%x3"
1985   [(set_attr "type" "<VStype_mul>")
1986    (set_attr "fp_type" "<VSfptype_mul>")])
1987
1988 (define_insn "*vsx_nfmsv4sf4"
1989   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1990         (neg:V4SF
1991          (fma:V4SF
1992            (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1993            (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1994            (neg:V4SF
1995              (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1996   "VECTOR_UNIT_VSX_P (V4SFmode)"
1997   "@
1998    xvnmsubasp %x0,%x1,%x2
1999    xvnmsubmsp %x0,%x1,%x3
2000    xvnmsubasp %x0,%x1,%x2
2001    xvnmsubmsp %x0,%x1,%x3
2002    vnmsubfp %0,%1,%2,%3"
2003   [(set_attr "type" "vecfloat")])
2004
2005 (define_insn "*vsx_nfmsv2df4"
2006   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
2007         (neg:V2DF
2008          (fma:V2DF
2009            (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
2010            (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
2011            (neg:V2DF
2012              (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
2013   "VECTOR_UNIT_VSX_P (V2DFmode)"
2014   "@
2015    xvnmsubadp %x0,%x1,%x2
2016    xvnmsubmdp %x0,%x1,%x3
2017    xvnmsubadp %x0,%x1,%x2
2018    xvnmsubmdp %x0,%x1,%x3"
2019   [(set_attr "type" "vecdouble")])
2020
2021 ;; Vector conditional expressions (no scalar version for these instructions)
2022 (define_insn "vsx_eq<mode>"
2023   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2024         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2025                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2026   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2027   "xvcmpeq<VSs> %x0,%x1,%x2"
2028   [(set_attr "type" "<VStype_simple>")
2029    (set_attr "fp_type" "<VSfptype_simple>")])
2030
2031 (define_insn "vsx_gt<mode>"
2032   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2033         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2034                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2035   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2036   "xvcmpgt<VSs> %x0,%x1,%x2"
2037   [(set_attr "type" "<VStype_simple>")
2038    (set_attr "fp_type" "<VSfptype_simple>")])
2039
2040 (define_insn "*vsx_ge<mode>"
2041   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2042         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2043                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2044   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2045   "xvcmpge<VSs> %x0,%x1,%x2"
2046   [(set_attr "type" "<VStype_simple>")
2047    (set_attr "fp_type" "<VSfptype_simple>")])
2048
2049 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2050 ;; indicate a combined status
2051 (define_insn "*vsx_eq_<mode>_p"
2052   [(set (reg:CC CR6_REGNO)
2053         (unspec:CC
2054          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2055                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2056          UNSPEC_PREDICATE))
2057    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2058         (eq:VSX_F (match_dup 1)
2059                   (match_dup 2)))]
2060   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2061   "xvcmpeq<VSs>. %x0,%x1,%x2"
2062   [(set_attr "type" "<VStype_simple>")])
2063
2064 (define_insn "*vsx_gt_<mode>_p"
2065   [(set (reg:CC CR6_REGNO)
2066         (unspec:CC
2067          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2068                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2069          UNSPEC_PREDICATE))
2070    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2071         (gt:VSX_F (match_dup 1)
2072                   (match_dup 2)))]
2073   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2074   "xvcmpgt<VSs>. %x0,%x1,%x2"
2075   [(set_attr "type" "<VStype_simple>")])
2076
2077 (define_insn "*vsx_ge_<mode>_p"
2078   [(set (reg:CC CR6_REGNO)
2079         (unspec:CC
2080          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2081                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2082          UNSPEC_PREDICATE))
2083    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2084         (ge:VSX_F (match_dup 1)
2085                   (match_dup 2)))]
2086   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2087   "xvcmpge<VSs>. %x0,%x1,%x2"
2088   [(set_attr "type" "<VStype_simple>")])
2089
2090 ;; Vector select
2091 (define_insn "*vsx_xxsel<mode>"
2092   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2093         (if_then_else:VSX_L
2094          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2095                 (match_operand:VSX_L 4 "zero_constant" ""))
2096          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2097          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2098   "VECTOR_MEM_VSX_P (<MODE>mode)"
2099   "xxsel %x0,%x3,%x2,%x1"
2100   [(set_attr "type" "vecmove")])
2101
2102 (define_insn "*vsx_xxsel<mode>_uns"
2103   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2104         (if_then_else:VSX_L
2105          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2106                    (match_operand:VSX_L 4 "zero_constant" ""))
2107          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2108          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2109   "VECTOR_MEM_VSX_P (<MODE>mode)"
2110   "xxsel %x0,%x3,%x2,%x1"
2111   [(set_attr "type" "vecmove")])
2112
2113 ;; Copy sign
2114 (define_insn "vsx_copysign<mode>3"
2115   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2116         (unspec:VSX_F
2117          [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2118           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
2119          UNSPEC_COPYSIGN))]
2120   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2121   "xvcpsgn<VSs> %x0,%x2,%x1"
2122   [(set_attr "type" "<VStype_simple>")
2123    (set_attr "fp_type" "<VSfptype_simple>")])
2124
2125 ;; For the conversions, limit the register class for the integer value to be
2126 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2127 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2128 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2129 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2130 ;; in allowing virtual registers.
2131 (define_insn "vsx_float<VSi><mode>2"
2132   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2133         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2134   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2135   "xvcvsx<VSc><VSs> %x0,%x1"
2136   [(set_attr "type" "<VStype_simple>")
2137    (set_attr "fp_type" "<VSfptype_simple>")])
2138
2139 (define_insn "vsx_floatuns<VSi><mode>2"
2140   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2141         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2142   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2143   "xvcvux<VSc><VSs> %x0,%x1"
2144   [(set_attr "type" "<VStype_simple>")
2145    (set_attr "fp_type" "<VSfptype_simple>")])
2146
2147 (define_insn "vsx_fix_trunc<mode><VSi>2"
2148   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2149         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2150   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2151   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2152   [(set_attr "type" "<VStype_simple>")
2153    (set_attr "fp_type" "<VSfptype_simple>")])
2154
2155 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2156   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2157         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2158   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2159   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2160   [(set_attr "type" "<VStype_simple>")
2161    (set_attr "fp_type" "<VSfptype_simple>")])
2162
2163 ;; Math rounding functions
2164 (define_insn "vsx_x<VSv>r<VSs>i"
2165   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2166         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2167                       UNSPEC_VSX_ROUND_I))]
2168   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2169   "x<VSv>r<VSs>i %x0,%x1"
2170   [(set_attr "type" "<VStype_simple>")
2171    (set_attr "fp_type" "<VSfptype_simple>")])
2172
2173 (define_insn "vsx_x<VSv>r<VSs>ic"
2174   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2175         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2176                       UNSPEC_VSX_ROUND_IC))]
2177   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2178   "x<VSv>r<VSs>ic %x0,%x1"
2179   [(set_attr "type" "<VStype_simple>")
2180    (set_attr "fp_type" "<VSfptype_simple>")])
2181
2182 (define_insn "vsx_btrunc<mode>2"
2183   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2184         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
2185   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2186   "xvr<VSs>iz %x0,%x1"
2187   [(set_attr "type" "<VStype_simple>")
2188    (set_attr "fp_type" "<VSfptype_simple>")])
2189
2190 (define_insn "*vsx_b2trunc<mode>2"
2191   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2192         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2193                       UNSPEC_FRIZ))]
2194   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2195   "x<VSv>r<VSs>iz %x0,%x1"
2196   [(set_attr "type" "<VStype_simple>")
2197    (set_attr "fp_type" "<VSfptype_simple>")])
2198
2199 (define_insn "vsx_floor<mode>2"
2200   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2201         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2202                       UNSPEC_FRIM))]
2203   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2204   "xvr<VSs>im %x0,%x1"
2205   [(set_attr "type" "<VStype_simple>")
2206    (set_attr "fp_type" "<VSfptype_simple>")])
2207
2208 (define_insn "vsx_ceil<mode>2"
2209   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2210         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2211                       UNSPEC_FRIP))]
2212   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2213   "xvr<VSs>ip %x0,%x1"
2214   [(set_attr "type" "<VStype_simple>")
2215    (set_attr "fp_type" "<VSfptype_simple>")])
2216
2217 \f
2218 ;; VSX convert to/from double vector
2219
2220 ;; Convert between single and double precision
2221 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2222 ;; scalar single precision instructions internally use the double format.
2223 ;; Prefer the altivec registers, since we likely will need to do a vperm
2224 (define_insn "vsx_<VS_spdp_insn>"
2225   [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2226         (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
2227                               UNSPEC_VSX_CVSPDP))]
2228   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2229   "<VS_spdp_insn> %x0,%x1"
2230   [(set_attr "type" "<VS_spdp_type>")])
2231
2232 ;; xscvspdp, represent the scalar SF type as V4SF
2233 (define_insn "vsx_xscvspdp"
2234   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2235         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2236                    UNSPEC_VSX_CVSPDP))]
2237   "VECTOR_UNIT_VSX_P (V4SFmode)"
2238   "xscvspdp %x0,%x1"
2239   [(set_attr "type" "fp")])
2240
2241 ;; Same as vsx_xscvspdp, but use SF as the type
2242 (define_insn "vsx_xscvspdp_scalar2"
2243   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2244         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2245                    UNSPEC_VSX_CVSPDP))]
2246   "VECTOR_UNIT_VSX_P (V4SFmode)"
2247   "xscvspdp %x0,%x1"
2248   [(set_attr "type" "fp")])
2249
2250 ;; Generate xvcvhpsp instruction
2251 (define_insn "vsx_xvcvhpsp"
2252   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2253         (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2254                      UNSPEC_VSX_CVHPSP))]
2255   "TARGET_P9_VECTOR"
2256   "xvcvhpsp %x0,%x1"
2257   [(set_attr "type" "vecfloat")])
2258
2259 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2260 ;; format of scalars is actually DF.
2261 (define_insn "vsx_xscvdpsp_scalar"
2262   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2263         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2264                      UNSPEC_VSX_CVSPDP))]
2265   "VECTOR_UNIT_VSX_P (V4SFmode)"
2266   "xscvdpsp %x0,%x1"
2267   [(set_attr "type" "fp")])
2268
2269 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2270 (define_insn "vsx_xscvdpspn"
2271   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2272         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
2273                      UNSPEC_VSX_CVDPSPN))]
2274   "TARGET_XSCVDPSPN"
2275   "xscvdpspn %x0,%x1"
2276   [(set_attr "type" "fp")])
2277
2278 (define_insn "vsx_xscvspdpn"
2279   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2280         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2281                    UNSPEC_VSX_CVSPDPN))]
2282   "TARGET_XSCVSPDPN"
2283   "xscvspdpn %x0,%x1"
2284   [(set_attr "type" "fp")])
2285
2286 (define_insn "vsx_xscvdpspn_scalar"
2287   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2288         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2289                      UNSPEC_VSX_CVDPSPN))]
2290   "TARGET_XSCVDPSPN"
2291   "xscvdpspn %x0,%x1"
2292   [(set_attr "type" "fp")])
2293
2294 ;; Used by direct move to move a SFmode value from GPR to VSX register
2295 (define_insn "vsx_xscvspdpn_directmove"
2296   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2297         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2298                    UNSPEC_VSX_CVSPDPN))]
2299   "TARGET_XSCVSPDPN"
2300   "xscvspdpn %x0,%x1"
2301   [(set_attr "type" "fp")])
2302
2303 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2304
2305 (define_expand "vsx_xvcvsxddp_scale"
2306   [(match_operand:V2DF 0 "vsx_register_operand" "")
2307    (match_operand:V2DI 1 "vsx_register_operand" "")
2308    (match_operand:QI 2 "immediate_operand" "")]
2309   "VECTOR_UNIT_VSX_P (V2DFmode)"
2310 {
2311   rtx op0 = operands[0];
2312   rtx op1 = operands[1];
2313   int scale = INTVAL(operands[2]);
2314   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2315   if (scale != 0)
2316     rs6000_scale_v2df (op0, op0, -scale);
2317   DONE;
2318 })
2319
2320 (define_insn "vsx_xvcvsxddp"
2321   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2322         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2323                      UNSPEC_VSX_XVCVSXDDP))]
2324   "VECTOR_UNIT_VSX_P (V2DFmode)"
2325   "xvcvsxddp %x0,%x1"
2326   [(set_attr "type" "vecdouble")])
2327
2328 (define_expand "vsx_xvcvuxddp_scale"
2329   [(match_operand:V2DF 0 "vsx_register_operand" "")
2330    (match_operand:V2DI 1 "vsx_register_operand" "")
2331    (match_operand:QI 2 "immediate_operand" "")]
2332   "VECTOR_UNIT_VSX_P (V2DFmode)"
2333 {
2334   rtx op0 = operands[0];
2335   rtx op1 = operands[1];
2336   int scale = INTVAL(operands[2]);
2337   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2338   if (scale != 0)
2339     rs6000_scale_v2df (op0, op0, -scale);
2340   DONE;
2341 })
2342
2343 (define_insn "vsx_xvcvuxddp"
2344   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2345         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2346                      UNSPEC_VSX_XVCVUXDDP))]
2347   "VECTOR_UNIT_VSX_P (V2DFmode)"
2348   "xvcvuxddp %x0,%x1"
2349   [(set_attr "type" "vecdouble")])
2350
2351 (define_expand "vsx_xvcvdpsxds_scale"
2352   [(match_operand:V2DI 0 "vsx_register_operand" "")
2353    (match_operand:V2DF 1 "vsx_register_operand" "")
2354    (match_operand:QI 2 "immediate_operand" "")]
2355   "VECTOR_UNIT_VSX_P (V2DFmode)"
2356 {
2357   rtx op0 = operands[0];
2358   rtx op1 = operands[1];
2359   rtx tmp;
2360   int scale = INTVAL (operands[2]);
2361   if (scale == 0)
2362     tmp = op1;
2363   else
2364     {
2365       tmp  = gen_reg_rtx (V2DFmode);
2366       rs6000_scale_v2df (tmp, op1, scale);
2367     }
2368   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2369   DONE;
2370 })
2371
2372 ;; convert vector of 64-bit floating point numbers to vector of
2373 ;; 64-bit signed integer
2374 (define_insn "vsx_xvcvdpsxds"
2375   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2376         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2377                      UNSPEC_VSX_XVCVDPSXDS))]
2378   "VECTOR_UNIT_VSX_P (V2DFmode)"
2379   "xvcvdpsxds %x0,%x1"
2380   [(set_attr "type" "vecdouble")])
2381
2382 ;; convert vector of 32-bit floating point numbers to vector of
2383 ;; 32-bit signed integer
2384 (define_insn "vsx_xvcvspsxws"
2385   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2386         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2387                      UNSPEC_VSX_XVCVSPSXWS))]
2388   "VECTOR_UNIT_VSX_P (V4SFmode)"
2389   "xvcvspsxws %x0,%x1"
2390   [(set_attr "type" "vecfloat")])
2391
2392 ;; convert vector of 64-bit floating point numbers to vector of
2393 ;; 64-bit unsigned integer
2394 (define_expand "vsx_xvcvdpuxds_scale"
2395   [(match_operand:V2DI 0 "vsx_register_operand" "")
2396    (match_operand:V2DF 1 "vsx_register_operand" "")
2397    (match_operand:QI 2 "immediate_operand" "")]
2398   "VECTOR_UNIT_VSX_P (V2DFmode)"
2399 {
2400   rtx op0 = operands[0];
2401   rtx op1 = operands[1];
2402   rtx tmp;
2403   int scale = INTVAL (operands[2]);
2404   if (scale == 0)
2405     tmp = op1;
2406   else
2407     {
2408       tmp = gen_reg_rtx (V2DFmode);
2409       rs6000_scale_v2df (tmp, op1, scale);
2410     }
2411   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2412   DONE;
2413 })
2414
2415 ;; convert vector of 32-bit floating point numbers to vector of
2416 ;; 32-bit unsigned integer
2417 (define_insn "vsx_xvcvspuxws"
2418   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2419         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2420                      UNSPEC_VSX_XVCVSPSXWS))]
2421   "VECTOR_UNIT_VSX_P (V4SFmode)"
2422   "xvcvspuxws %x0,%x1"
2423   [(set_attr "type" "vecfloat")])
2424
2425 (define_insn "vsx_xvcvdpuxds"
2426   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2427         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2428                      UNSPEC_VSX_XVCVDPUXDS))]
2429   "VECTOR_UNIT_VSX_P (V2DFmode)"
2430   "xvcvdpuxds %x0,%x1"
2431   [(set_attr "type" "vecdouble")])
2432
2433 ;; Convert from 64-bit to 32-bit types
2434 ;; Note, favor the Altivec registers since the usual use of these instructions
2435 ;; is in vector converts and we need to use the Altivec vperm instruction.
2436
2437 (define_insn "vsx_xvcvdpsxws"
2438   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2439         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2440                      UNSPEC_VSX_CVDPSXWS))]
2441   "VECTOR_UNIT_VSX_P (V2DFmode)"
2442   "xvcvdpsxws %x0,%x1"
2443   [(set_attr "type" "vecdouble")])
2444
2445 (define_insn "vsx_xvcvdpuxws"
2446   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2447         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2448                      UNSPEC_VSX_CVDPUXWS))]
2449   "VECTOR_UNIT_VSX_P (V2DFmode)"
2450   "xvcvdpuxws %x0,%x1"
2451   [(set_attr "type" "vecdouble")])
2452
2453 (define_insn "vsx_xvcvsxdsp"
2454   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2455         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2456                      UNSPEC_VSX_CVSXDSP))]
2457   "VECTOR_UNIT_VSX_P (V2DFmode)"
2458   "xvcvsxdsp %x0,%x1"
2459   [(set_attr "type" "vecfloat")])
2460
2461 (define_insn "vsx_xvcvuxdsp"
2462   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2463         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2464                      UNSPEC_VSX_CVUXDSP))]
2465   "VECTOR_UNIT_VSX_P (V2DFmode)"
2466   "xvcvuxdsp %x0,%x1"
2467   [(set_attr "type" "vecdouble")])
2468
2469 (define_insn "vsx_xvcdpsp"
2470   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2471         (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2472                      UNSPEC_VSX_XVCDPSP))]
2473   "VECTOR_UNIT_VSX_P (V2DFmode)"
2474   "xvcvdpsp %x0,%x1"
2475   [(set_attr "type" "vecdouble")])
2476
2477 ;; Convert from 32-bit to 64-bit types
2478 ;; Provide both vector and scalar targets
2479 (define_insn "vsx_xvcvsxwdp"
2480   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2481         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2482                      UNSPEC_VSX_CVSXWDP))]
2483   "VECTOR_UNIT_VSX_P (V2DFmode)"
2484   "xvcvsxwdp %x0,%x1"
2485   [(set_attr "type" "vecdouble")])
2486
2487 (define_insn "vsx_xvcvsxwdp_df"
2488   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2489         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2490                    UNSPEC_VSX_CVSXWDP))]
2491   "TARGET_VSX"
2492   "xvcvsxwdp %x0,%x1"
2493   [(set_attr "type" "vecdouble")])
2494
2495 (define_insn "vsx_xvcvuxwdp"
2496   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2497         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2498                      UNSPEC_VSX_CVUXWDP))]
2499   "VECTOR_UNIT_VSX_P (V2DFmode)"
2500   "xvcvuxwdp %x0,%x1"
2501   [(set_attr "type" "vecdouble")])
2502
2503 (define_insn "vsx_xvcvuxwdp_df"
2504   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2505         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2506                    UNSPEC_VSX_CVUXWDP))]
2507   "TARGET_VSX"
2508   "xvcvuxwdp %x0,%x1"
2509   [(set_attr "type" "vecdouble")])
2510
2511 (define_insn "vsx_xvcvspsxds"
2512   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2513         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2514                      UNSPEC_VSX_CVSPSXDS))]
2515   "VECTOR_UNIT_VSX_P (V2DFmode)"
2516   "xvcvspsxds %x0,%x1"
2517   [(set_attr "type" "vecdouble")])
2518
2519 (define_insn "vsx_xvcvspuxds"
2520   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2521         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2522                      UNSPEC_VSX_CVSPUXDS))]
2523   "VECTOR_UNIT_VSX_P (V2DFmode)"
2524   "xvcvspuxds %x0,%x1"
2525   [(set_attr "type" "vecdouble")])
2526
2527 (define_insn "vsx_xvcvsxwsp"
2528   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2529         (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2530                      UNSPEC_VSX_CVSXWSP))]
2531   "VECTOR_UNIT_VSX_P (V4SFmode)"
2532   "xvcvsxwsp %x0,%x1"
2533   [(set_attr "type" "vecfloat")])
2534
2535 (define_insn "vsx_xvcvuxwsp"
2536   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2537         (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2538                     UNSPEC_VSX_CVUXWSP))]
2539   "VECTOR_UNIT_VSX_P (V4SFmode)"
2540   "xvcvuxwsp %x0,%x1"
2541   [(set_attr "type" "vecfloat")])
2542
2543 ;; Generate float2 double
2544 ;; convert two double to float
2545 (define_expand "float2_v2df"
2546   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2547    (use (match_operand:V2DF 1 "register_operand" "wa"))
2548    (use (match_operand:V2DF 2 "register_operand" "wa"))]
2549  "VECTOR_UNIT_VSX_P (V4SFmode)"
2550 {
2551   rtx rtx_src1, rtx_src2, rtx_dst;
2552
2553   rtx_dst = operands[0];
2554   rtx_src1 = operands[1];
2555   rtx_src2 = operands[2];
2556
2557   rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2558   DONE;
2559 })
2560
2561 ;; Generate float2
2562 ;; convert two long long signed ints to float
2563 (define_expand "float2_v2di"
2564   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2565    (use (match_operand:V2DI 1 "register_operand" "wa"))
2566    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2567  "VECTOR_UNIT_VSX_P (V4SFmode)"
2568 {
2569   rtx rtx_src1, rtx_src2, rtx_dst;
2570
2571   rtx_dst = operands[0];
2572   rtx_src1 = operands[1];
2573   rtx_src2 = operands[2];
2574
2575   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2576   DONE;
2577 })
2578
2579 ;; Generate uns_float2
2580 ;; convert two long long unsigned ints to float
2581 (define_expand "uns_float2_v2di"
2582   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2583    (use (match_operand:V2DI 1 "register_operand" "wa"))
2584    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2585  "VECTOR_UNIT_VSX_P (V4SFmode)"
2586 {
2587   rtx rtx_src1, rtx_src2, rtx_dst;
2588
2589   rtx_dst = operands[0];
2590   rtx_src1 = operands[1];
2591   rtx_src2 = operands[2];
2592
2593   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2594   DONE;
2595 })
2596
2597 ;; Generate floate
2598 ;; convert  double or long long signed to float
2599 ;; (Only even words are valid, BE numbering)
2600 (define_expand "floate<mode>"
2601   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2602    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2603   "VECTOR_UNIT_VSX_P (V4SFmode)"
2604 {
2605   if (VECTOR_ELT_ORDER_BIG)
2606     {
2607       /* Shift left one word to put even word correct location */
2608       rtx rtx_tmp;
2609       rtx rtx_val = GEN_INT (4);
2610
2611       rtx_tmp = gen_reg_rtx (V4SFmode);
2612       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2613       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2614                  rtx_tmp, rtx_tmp, rtx_val));
2615     }
2616   else
2617     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2618
2619   DONE;
2620 })
2621
2622 ;; Generate uns_floate
2623 ;; convert long long unsigned to float
2624 ;; (Only even words are valid, BE numbering)
2625 (define_expand "unsfloatev2di"
2626   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2627    (use (match_operand:V2DI 1 "register_operand" "wa"))]
2628   "VECTOR_UNIT_VSX_P (V4SFmode)"
2629 {
2630   if (VECTOR_ELT_ORDER_BIG)
2631     {
2632       /* Shift left one word to put even word correct location */
2633       rtx rtx_tmp;
2634       rtx rtx_val = GEN_INT (4);
2635
2636       rtx_tmp = gen_reg_rtx (V4SFmode);
2637       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2638       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2639                  rtx_tmp, rtx_tmp, rtx_val));
2640     }
2641   else
2642     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2643
2644   DONE;
2645 })
2646
2647 ;; Generate floato
2648 ;; convert double or long long signed to float
2649 ;; Only odd words are valid, BE numbering)
2650 (define_expand "floato<mode>"
2651   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2652    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2653   "VECTOR_UNIT_VSX_P (V4SFmode)"
2654 {
2655   if (VECTOR_ELT_ORDER_BIG)
2656     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2657   else
2658     {
2659       /* Shift left one word to put odd word correct location */
2660       rtx rtx_tmp;
2661       rtx rtx_val = GEN_INT (4);
2662
2663       rtx_tmp = gen_reg_rtx (V4SFmode);
2664       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2665       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2666                  rtx_tmp, rtx_tmp, rtx_val));
2667     }
2668   DONE;
2669 })
2670
2671 ;; Generate uns_floato
2672 ;; convert long long unsigned to float
2673 ;; (Only odd words are valid, BE numbering)
2674 (define_expand "unsfloatov2di"
2675  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2676   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2677  "VECTOR_UNIT_VSX_P (V4SFmode)"
2678 {
2679   if (VECTOR_ELT_ORDER_BIG)
2680     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2681   else
2682     {
2683       /* Shift left one word to put odd word correct location */
2684       rtx rtx_tmp;
2685       rtx rtx_val = GEN_INT (4);
2686
2687       rtx_tmp = gen_reg_rtx (V4SFmode);
2688       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2689       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2690                  rtx_tmp, rtx_tmp, rtx_val));
2691     }
2692   DONE;
2693 })
2694
2695 ;; Generate vsigned2
2696 ;; convert two double float vectors to a vector of single precision ints
2697 (define_expand "vsigned2_v2df"
2698   [(match_operand:V4SI 0 "register_operand" "=wa")
2699    (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2700                  (match_operand:V2DF 2 "register_operand" "wa")]
2701   UNSPEC_VSX_VSIGNED2)]
2702   "TARGET_VSX"
2703 {
2704   rtx rtx_src1, rtx_src2, rtx_dst;
2705   bool signed_convert=true;
2706
2707   rtx_dst = operands[0];
2708   rtx_src1 = operands[1];
2709   rtx_src2 = operands[2];
2710
2711   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2712   DONE;
2713 })
2714
2715 ;; Generate vsignedo_v2df
2716 ;; signed double float to int convert odd word
2717 (define_expand "vsignedo_v2df"
2718   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2719         (match_operand:V2DF 1 "register_operand" "wa"))]
2720   "TARGET_VSX"
2721 {
2722   if (VECTOR_ELT_ORDER_BIG)
2723     {
2724       rtx rtx_tmp;
2725       rtx rtx_val = GEN_INT (12);
2726       rtx_tmp = gen_reg_rtx (V4SImode);
2727
2728       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2729
2730       /* Big endian word numbering for words in operand is 0 1 2 3.
2731          take (operand[1] operand[1]) and shift left one word
2732          0 1 2 3    0 1 2 3  =>  1 2 3 0
2733          Words 1 and 3 are now are now where they need to be for result.  */
2734
2735       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2736                  rtx_tmp, rtx_val));
2737     }
2738   else
2739     /* Little endian word numbering for operand is 3 2 1 0.
2740        Result words 3 and 1 are where they need to be.  */
2741     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2742
2743   DONE;
2744 }
2745   [(set_attr "type" "veccomplex")])
2746
2747 ;; Generate vsignede_v2df
2748 ;; signed double float to int even word
2749 (define_expand "vsignede_v2df"
2750   [(set (match_operand:V4SI 0 "register_operand" "=v")
2751         (match_operand:V2DF 1 "register_operand" "v"))]
2752   "TARGET_VSX"
2753 {
2754   if (VECTOR_ELT_ORDER_BIG)
2755     /* Big endian word numbering for words in operand is 0 1
2756        Result words 0 is where they need to be.  */
2757     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2758
2759   else
2760     {
2761       rtx rtx_tmp;
2762       rtx rtx_val = GEN_INT (12);
2763       rtx_tmp = gen_reg_rtx (V4SImode);
2764
2765       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2766
2767       /* Little endian word numbering for operand is 3 2 1 0.
2768          take (operand[1] operand[1]) and shift left three words
2769          0 1 2 3   0 1 2 3  =>  3 0 1 2
2770          Words 0 and 2 are now where they need to be for the result.  */
2771       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2772                  rtx_tmp, rtx_val));
2773     }
2774   DONE;
2775 }
2776   [(set_attr "type" "veccomplex")])
2777
2778 ;; Generate unsigned2
2779 ;; convert two double float vectors to a vector of single precision
2780 ;; unsigned ints
2781 (define_expand "vunsigned2_v2df"
2782 [(match_operand:V4SI 0 "register_operand" "=v")
2783  (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2784                (match_operand:V2DF 2 "register_operand" "v")]
2785               UNSPEC_VSX_VSIGNED2)]
2786  "TARGET_VSX"
2787 {
2788   rtx rtx_src1, rtx_src2, rtx_dst;
2789   bool signed_convert=false;
2790
2791   rtx_dst = operands[0];
2792   rtx_src1 = operands[1];
2793   rtx_src2 = operands[2];
2794
2795   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2796   DONE;
2797 })
2798
2799 ;; Generate vunsignedo_v2df
2800 ;; unsigned double float to int convert odd word
2801 (define_expand "vunsignedo_v2df"
2802   [(set (match_operand:V4SI 0 "register_operand" "=v")
2803         (match_operand:V2DF 1 "register_operand" "v"))]
2804   "TARGET_VSX"
2805 {
2806   if (VECTOR_ELT_ORDER_BIG)
2807     {
2808       rtx rtx_tmp;
2809       rtx rtx_val = GEN_INT (12);
2810       rtx_tmp = gen_reg_rtx (V4SImode);
2811
2812       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2813
2814       /* Big endian word numbering for words in operand is 0 1 2 3.
2815          take (operand[1] operand[1]) and shift left one word
2816          0 1 2 3    0 1 2 3  =>  1 2 3 0
2817          Words 1 and 3 are now are now where they need to be for result.  */
2818
2819       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2820                  rtx_tmp, rtx_val));
2821     }
2822   else
2823     /* Little endian word numbering for operand is 3 2 1 0.
2824        Result words 3 and 1 are where they need to be.  */
2825     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2826
2827   DONE;
2828 }
2829   [(set_attr "type" "veccomplex")])
2830
2831 ;; Generate vunsignede_v2df
2832 ;; unsigned double float to int even word
2833 (define_expand "vunsignede_v2df"
2834   [(set (match_operand:V4SI 0 "register_operand" "=v")
2835         (match_operand:V2DF 1 "register_operand" "v"))]
2836   "TARGET_VSX"
2837 {
2838   if (VECTOR_ELT_ORDER_BIG)
2839     /* Big endian word numbering for words in operand is 0 1
2840        Result words 0 is where they need to be.  */
2841     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2842
2843   else
2844     {
2845       rtx rtx_tmp;
2846       rtx rtx_val = GEN_INT (12);
2847       rtx_tmp = gen_reg_rtx (V4SImode);
2848
2849       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2850
2851       /* Little endian word numbering for operand is 3 2 1 0.
2852          take (operand[1] operand[1]) and shift left three words
2853          0 1 2 3   0 1 2 3  =>  3 0 1 2
2854          Words 0 and 2 are now where they need to be for the result.  */
2855       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2856                  rtx_tmp, rtx_val));
2857     }
2858   DONE;
2859 }
2860   [(set_attr "type" "veccomplex")])
2861
2862 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2863 ;; since the xvrdpiz instruction does not truncate the value if the floating
2864 ;; point value is < LONG_MIN or > LONG_MAX.
2865 (define_insn "*vsx_float_fix_v2df2"
2866   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2867         (float:V2DF
2868          (fix:V2DI
2869           (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2870   "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
2871    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2872    && !flag_trapping_math && TARGET_FRIZ"
2873   "xvrdpiz %x0,%x1"
2874   [(set_attr "type" "vecdouble")
2875    (set_attr "fp_type" "fp_addsub_d")])
2876
2877 \f
2878 ;; Permute operations
2879
2880 ;; Build a V2DF/V2DI vector from two scalars
2881 (define_insn "vsx_concat_<mode>"
2882   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2883         (vec_concat:VSX_D
2884          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2885          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2886   "VECTOR_MEM_VSX_P (<MODE>mode)"
2887 {
2888   if (which_alternative == 0)
2889     return (BYTES_BIG_ENDIAN
2890             ? "xxpermdi %x0,%x1,%x2,0"
2891             : "xxpermdi %x0,%x2,%x1,0");
2892
2893   else if (which_alternative == 1)
2894     return (BYTES_BIG_ENDIAN
2895             ? "mtvsrdd %x0,%1,%2"
2896             : "mtvsrdd %x0,%2,%1");
2897
2898   else
2899     gcc_unreachable ();
2900 }
2901   [(set_attr "type" "vecperm")])
2902
2903 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2904 ;; word element in a vector register.
2905 (define_insn "*vsx_concat_<mode>_1"
2906   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2907         (vec_concat:VSX_D
2908          (vec_select:<VS_scalar>
2909           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2910           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2911          (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2912   "VECTOR_MEM_VSX_P (<MODE>mode)"
2913 {
2914   HOST_WIDE_INT dword = INTVAL (operands[2]);
2915   if (BYTES_BIG_ENDIAN)
2916     {
2917       operands[4] = GEN_INT (2*dword);
2918       return "xxpermdi %x0,%x1,%x3,%4";
2919     }
2920   else
2921     {
2922       operands[4] = GEN_INT (!dword);
2923       return "xxpermdi %x0,%x3,%x1,%4";
2924     }
2925 }
2926   [(set_attr "type" "vecperm")])
2927
2928 (define_insn "*vsx_concat_<mode>_2"
2929   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2930         (vec_concat:VSX_D
2931          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2932          (vec_select:<VS_scalar>
2933           (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2934           (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2935   "VECTOR_MEM_VSX_P (<MODE>mode)"
2936 {
2937   HOST_WIDE_INT dword = INTVAL (operands[3]);
2938   if (BYTES_BIG_ENDIAN)
2939     {
2940       operands[4] = GEN_INT (dword);
2941       return "xxpermdi %x0,%x1,%x2,%4";
2942     }
2943   else
2944     {
2945       operands[4] = GEN_INT (2 * !dword);
2946       return "xxpermdi %x0,%x2,%x1,%4";
2947     }
2948 }
2949   [(set_attr "type" "vecperm")])
2950
2951 (define_insn "*vsx_concat_<mode>_3"
2952   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2953         (vec_concat:VSX_D
2954          (vec_select:<VS_scalar>
2955           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2956           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2957          (vec_select:<VS_scalar>
2958           (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2959           (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2960   "VECTOR_MEM_VSX_P (<MODE>mode)"
2961 {
2962   HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2963   HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2964   if (BYTES_BIG_ENDIAN)
2965     {
2966       operands[5] = GEN_INT ((2 * dword1) + dword2);
2967       return "xxpermdi %x0,%x1,%x3,%5";
2968     }
2969   else
2970     {
2971       operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2972       return "xxpermdi %x0,%x3,%x1,%5";
2973     }
2974 }
2975   [(set_attr "type" "vecperm")])
2976
2977 ;; Special purpose concat using xxpermdi to glue two single precision values
2978 ;; together, relying on the fact that internally scalar floats are represented
2979 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2980 (define_insn "vsx_concat_v2sf"
2981   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2982         (unspec:V2DF
2983          [(match_operand:SF 1 "vsx_register_operand" "ww")
2984           (match_operand:SF 2 "vsx_register_operand" "ww")]
2985          UNSPEC_VSX_CONCAT))]
2986   "VECTOR_MEM_VSX_P (V2DFmode)"
2987 {
2988   if (BYTES_BIG_ENDIAN)
2989     return "xxpermdi %x0,%x1,%x2,0";
2990   else
2991     return "xxpermdi %x0,%x2,%x1,0";
2992 }
2993   [(set_attr "type" "vecperm")])
2994
2995 ;; V4SImode initialization splitter
2996 (define_insn_and_split "vsx_init_v4si"
2997   [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2998         (unspec:V4SI
2999          [(match_operand:SI 1 "reg_or_cint_operand" "rn")
3000           (match_operand:SI 2 "reg_or_cint_operand" "rn")
3001           (match_operand:SI 3 "reg_or_cint_operand" "rn")
3002           (match_operand:SI 4 "reg_or_cint_operand" "rn")]
3003          UNSPEC_VSX_VEC_INIT))
3004    (clobber (match_scratch:DI 5 "=&r"))
3005    (clobber (match_scratch:DI 6 "=&r"))]
3006    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3007    "#"
3008    "&& reload_completed"
3009    [(const_int 0)]
3010 {
3011   rs6000_split_v4si_init (operands);
3012   DONE;
3013 })
3014
3015 ;; xxpermdi for little endian loads and stores.  We need several of
3016 ;; these since the form of the PARALLEL differs by mode.
3017 (define_insn "*vsx_xxpermdi2_le_<mode>"
3018   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3019         (vec_select:VSX_D
3020           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3021           (parallel [(const_int 1) (const_int 0)])))]
3022   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3023   "xxpermdi %x0,%x1,%x1,2"
3024   [(set_attr "type" "vecperm")])
3025
3026 (define_insn "*vsx_xxpermdi4_le_<mode>"
3027   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3028         (vec_select:VSX_W
3029           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3030           (parallel [(const_int 2) (const_int 3)
3031                      (const_int 0) (const_int 1)])))]
3032   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3033   "xxpermdi %x0,%x1,%x1,2"
3034   [(set_attr "type" "vecperm")])
3035
3036 (define_insn "*vsx_xxpermdi8_le_V8HI"
3037   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3038         (vec_select:V8HI
3039           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3040           (parallel [(const_int 4) (const_int 5)
3041                      (const_int 6) (const_int 7)
3042                      (const_int 0) (const_int 1)
3043                      (const_int 2) (const_int 3)])))]
3044   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
3045   "xxpermdi %x0,%x1,%x1,2"
3046   [(set_attr "type" "vecperm")])
3047
3048 (define_insn "*vsx_xxpermdi16_le_V16QI"
3049   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3050         (vec_select:V16QI
3051           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3052           (parallel [(const_int 8) (const_int 9)
3053                      (const_int 10) (const_int 11)
3054                      (const_int 12) (const_int 13)
3055                      (const_int 14) (const_int 15)
3056                      (const_int 0) (const_int 1)
3057                      (const_int 2) (const_int 3)
3058                      (const_int 4) (const_int 5)
3059                      (const_int 6) (const_int 7)])))]
3060   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3061   "xxpermdi %x0,%x1,%x1,2"
3062   [(set_attr "type" "vecperm")])
3063
3064 ;; lxvd2x for little endian loads.  We need several of
3065 ;; these since the form of the PARALLEL differs by mode.
3066 (define_insn "*vsx_lxvd2x2_le_<mode>"
3067   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3068         (vec_select:VSX_D
3069           (match_operand:VSX_D 1 "memory_operand" "Z")
3070           (parallel [(const_int 1) (const_int 0)])))]
3071   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3072   "lxvd2x %x0,%y1"
3073   [(set_attr "type" "vecload")])
3074
3075 (define_insn "*vsx_lxvd2x4_le_<mode>"
3076   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3077         (vec_select:VSX_W
3078           (match_operand:VSX_W 1 "memory_operand" "Z")
3079           (parallel [(const_int 2) (const_int 3)
3080                      (const_int 0) (const_int 1)])))]
3081   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3082   "lxvd2x %x0,%y1"
3083   [(set_attr "type" "vecload")])
3084
3085 (define_insn "*vsx_lxvd2x8_le_V8HI"
3086   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3087         (vec_select:V8HI
3088           (match_operand:V8HI 1 "memory_operand" "Z")
3089           (parallel [(const_int 4) (const_int 5)
3090                      (const_int 6) (const_int 7)
3091                      (const_int 0) (const_int 1)
3092                      (const_int 2) (const_int 3)])))]
3093   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3094   "lxvd2x %x0,%y1"
3095   [(set_attr "type" "vecload")])
3096
3097 (define_insn "*vsx_lxvd2x16_le_V16QI"
3098   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3099         (vec_select:V16QI
3100           (match_operand:V16QI 1 "memory_operand" "Z")
3101           (parallel [(const_int 8) (const_int 9)
3102                      (const_int 10) (const_int 11)
3103                      (const_int 12) (const_int 13)
3104                      (const_int 14) (const_int 15)
3105                      (const_int 0) (const_int 1)
3106                      (const_int 2) (const_int 3)
3107                      (const_int 4) (const_int 5)
3108                      (const_int 6) (const_int 7)])))]
3109   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3110   "lxvd2x %x0,%y1"
3111   [(set_attr "type" "vecload")])
3112
3113 ;; stxvd2x for little endian stores.  We need several of
3114 ;; these since the form of the PARALLEL differs by mode.
3115 (define_insn "*vsx_stxvd2x2_le_<mode>"
3116   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3117         (vec_select:VSX_D
3118           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3119           (parallel [(const_int 1) (const_int 0)])))]
3120   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3121   "stxvd2x %x1,%y0"
3122   [(set_attr "type" "vecstore")])
3123
3124 (define_insn "*vsx_stxvd2x4_le_<mode>"
3125   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3126         (vec_select:VSX_W
3127           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3128           (parallel [(const_int 2) (const_int 3)
3129                      (const_int 0) (const_int 1)])))]
3130   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3131   "stxvd2x %x1,%y0"
3132   [(set_attr "type" "vecstore")])
3133
3134 (define_insn "*vsx_stxvd2x8_le_V8HI"
3135   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3136         (vec_select:V8HI
3137           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3138           (parallel [(const_int 4) (const_int 5)
3139                      (const_int 6) (const_int 7)
3140                      (const_int 0) (const_int 1)
3141                      (const_int 2) (const_int 3)])))]
3142   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3143   "stxvd2x %x1,%y0"
3144   [(set_attr "type" "vecstore")])
3145
3146 (define_insn "*vsx_stxvd2x16_le_V16QI"
3147   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3148         (vec_select:V16QI
3149           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3150           (parallel [(const_int 8) (const_int 9)
3151                      (const_int 10) (const_int 11)
3152                      (const_int 12) (const_int 13)
3153                      (const_int 14) (const_int 15)
3154                      (const_int 0) (const_int 1)
3155                      (const_int 2) (const_int 3)
3156                      (const_int 4) (const_int 5)
3157                      (const_int 6) (const_int 7)])))]
3158   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3159   "stxvd2x %x1,%y0"
3160   [(set_attr "type" "vecstore")])
3161
3162 ;; Convert a TImode value into V1TImode
3163 (define_expand "vsx_set_v1ti"
3164   [(match_operand:V1TI 0 "nonimmediate_operand" "")
3165    (match_operand:V1TI 1 "nonimmediate_operand" "")
3166    (match_operand:TI 2 "input_operand" "")
3167    (match_operand:QI 3 "u5bit_cint_operand" "")]
3168   "VECTOR_MEM_VSX_P (V1TImode)"
3169 {
3170   if (operands[3] != const0_rtx)
3171     gcc_unreachable ();
3172
3173   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3174   DONE;
3175 })
3176
3177 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3178 (define_expand "vsx_set_<mode>"
3179   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3180    (use (match_operand:VSX_D 1 "vsx_register_operand"))
3181    (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3182    (use (match_operand:QI 3 "const_0_to_1_operand"))]
3183   "VECTOR_MEM_VSX_P (<MODE>mode)"
3184 {
3185   rtx dest = operands[0];
3186   rtx vec_reg = operands[1];
3187   rtx value = operands[2];
3188   rtx ele = operands[3];
3189   rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3190
3191   if (ele == const0_rtx)
3192     {
3193       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3194       emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3195       DONE;
3196     }
3197   else if (ele == const1_rtx)
3198     {
3199       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3200       emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3201       DONE;
3202     }
3203   else
3204     gcc_unreachable ();
3205 })
3206
3207 ;; Extract a DF/DI element from V2DF/V2DI
3208 ;; Optimize cases were we can do a simple or direct move.
3209 ;; Or see if we can avoid doing the move at all
3210
3211 ;; There are some unresolved problems with reload that show up if an Altivec
3212 ;; register was picked.  Limit the scalar value to FPRs for now.
3213
3214 (define_insn "vsx_extract_<mode>"
3215   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
3216
3217         (vec_select:<VS_scalar>
3218          (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
3219
3220          (parallel
3221           [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
3222   "VECTOR_MEM_VSX_P (<MODE>mode)"
3223 {
3224   int element = INTVAL (operands[2]);
3225   int op0_regno = REGNO (operands[0]);
3226   int op1_regno = REGNO (operands[1]);
3227   int fldDM;
3228
3229   gcc_assert (IN_RANGE (element, 0, 1));
3230   gcc_assert (VSX_REGNO_P (op1_regno));
3231
3232   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3233     {
3234       if (op0_regno == op1_regno)
3235         return ASM_COMMENT_START " vec_extract to same register";
3236
3237       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3238                && TARGET_POWERPC64)
3239         return "mfvsrd %0,%x1";
3240
3241       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3242         return "fmr %0,%1";
3243
3244       else if (VSX_REGNO_P (op0_regno))
3245         return "xxlor %x0,%x1,%x1";
3246
3247       else
3248         gcc_unreachable ();
3249     }
3250
3251   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3252            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3253     return "mfvsrld %0,%x1";
3254
3255   else if (VSX_REGNO_P (op0_regno))
3256     {
3257       fldDM = element << 1;
3258       if (!BYTES_BIG_ENDIAN)
3259         fldDM = 3 - fldDM;
3260       operands[3] = GEN_INT (fldDM);
3261       return "xxpermdi %x0,%x1,%x1,%3";
3262     }
3263
3264   else
3265     gcc_unreachable ();
3266 }
3267   [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
3268
3269 ;; Optimize extracting a single scalar element from memory.
3270 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3271   [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3272         (vec_select:<VSX_D:VS_scalar>
3273          (match_operand:VSX_D 1 "memory_operand" "m,m")
3274          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3275    (clobber (match_scratch:P 3 "=&b,&b"))]
3276   "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3277   "#"
3278   "&& reload_completed"
3279   [(set (match_dup 0) (match_dup 4))]
3280 {
3281   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3282                                            operands[3], <VSX_D:VS_scalar>mode);
3283 }
3284   [(set_attr "type" "fpload,load")
3285    (set_attr "length" "8")])
3286
3287 ;; Optimize storing a single scalar element that is the right location to
3288 ;; memory
3289 (define_insn "*vsx_extract_<mode>_store"
3290   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3291         (vec_select:<VS_scalar>
3292          (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
3293          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3294   "VECTOR_MEM_VSX_P (<MODE>mode)"
3295   "@
3296    stfd%U0%X0 %1,%0
3297    stxsd%U0x %x1,%y0
3298    stxsd %1,%0"
3299   [(set_attr "type" "fpstore")
3300    (set_attr "length" "4")])
3301
3302 ;; Variable V2DI/V2DF extract shift
3303 (define_insn "vsx_vslo_<mode>"
3304   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3305         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3306                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3307                             UNSPEC_VSX_VSLO))]
3308   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3309   "vslo %0,%1,%2"
3310   [(set_attr "type" "vecperm")])
3311
3312 ;; Variable V2DI/V2DF extract
3313 (define_insn_and_split "vsx_extract_<mode>_var"
3314   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3315         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3316                              (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3317                             UNSPEC_VSX_EXTRACT))
3318    (clobber (match_scratch:DI 3 "=r,&b,&b"))
3319    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3320   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3321   "#"
3322   "&& reload_completed"
3323   [(const_int 0)]
3324 {
3325   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3326                                 operands[3], operands[4]);
3327   DONE;
3328 })
3329
3330 ;; Extract a SF element from V4SF
3331 (define_insn_and_split "vsx_extract_v4sf"
3332   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
3333         (vec_select:SF
3334          (match_operand:V4SF 1 "vsx_register_operand" "wa")
3335          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3336    (clobber (match_scratch:V4SF 3 "=0"))]
3337   "VECTOR_UNIT_VSX_P (V4SFmode)"
3338   "#"
3339   "&& 1"
3340   [(const_int 0)]
3341 {
3342   rtx op0 = operands[0];
3343   rtx op1 = operands[1];
3344   rtx op2 = operands[2];
3345   rtx op3 = operands[3];
3346   rtx tmp;
3347   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3348
3349   if (ele == 0)
3350     tmp = op1;
3351   else
3352     {
3353       if (GET_CODE (op3) == SCRATCH)
3354         op3 = gen_reg_rtx (V4SFmode);
3355       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3356       tmp = op3;
3357     }
3358   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3359   DONE;
3360 }
3361   [(set_attr "length" "8")
3362    (set_attr "type" "fp")])
3363
3364 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3365   [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3366         (vec_select:SF
3367          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3368          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3369    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3370   "VECTOR_MEM_VSX_P (V4SFmode)"
3371   "#"
3372   "&& reload_completed"
3373   [(set (match_dup 0) (match_dup 4))]
3374 {
3375   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3376                                            operands[3], SFmode);
3377 }
3378   [(set_attr "type" "fpload,fpload,fpload,load")
3379    (set_attr "length" "8")])
3380
3381 ;; Variable V4SF extract
3382 (define_insn_and_split "vsx_extract_v4sf_var"
3383   [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3384         (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3385                     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3386                    UNSPEC_VSX_EXTRACT))
3387    (clobber (match_scratch:DI 3 "=r,&b,&b"))
3388    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3389   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3390   "#"
3391   "&& reload_completed"
3392   [(const_int 0)]
3393 {
3394   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3395                                 operands[3], operands[4]);
3396   DONE;
3397 })
3398
3399 ;; Expand the builtin form of xxpermdi to canonical rtl.
3400 (define_expand "vsx_xxpermdi_<mode>"
3401   [(match_operand:VSX_L 0 "vsx_register_operand")
3402    (match_operand:VSX_L 1 "vsx_register_operand")
3403    (match_operand:VSX_L 2 "vsx_register_operand")
3404    (match_operand:QI 3 "u5bit_cint_operand")]
3405   "VECTOR_MEM_VSX_P (<MODE>mode)"
3406 {
3407   rtx target = operands[0];
3408   rtx op0 = operands[1];
3409   rtx op1 = operands[2];
3410   int mask = INTVAL (operands[3]);
3411   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3412   rtx perm1 = GEN_INT ((mask & 1) + 2);
3413   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3414
3415   if (<MODE>mode == V2DFmode)
3416     gen = gen_vsx_xxpermdi2_v2df_1;
3417   else
3418     {
3419       gen = gen_vsx_xxpermdi2_v2di_1;
3420       if (<MODE>mode != V2DImode)
3421         {
3422           target = gen_lowpart (V2DImode, target);
3423           op0 = gen_lowpart (V2DImode, op0);
3424           op1 = gen_lowpart (V2DImode, op1);
3425         }
3426     }
3427   emit_insn (gen (target, op0, op1, perm0, perm1));
3428   DONE;
3429 })
3430
3431 ;; Special version of xxpermdi that retains big-endian semantics.
3432 (define_expand "vsx_xxpermdi_<mode>_be"
3433   [(match_operand:VSX_L 0 "vsx_register_operand")
3434    (match_operand:VSX_L 1 "vsx_register_operand")
3435    (match_operand:VSX_L 2 "vsx_register_operand")
3436    (match_operand:QI 3 "u5bit_cint_operand")]
3437   "VECTOR_MEM_VSX_P (<MODE>mode)"
3438 {
3439   rtx target = operands[0];
3440   rtx op0 = operands[1];
3441   rtx op1 = operands[2];
3442   int mask = INTVAL (operands[3]);
3443   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3444   rtx perm1 = GEN_INT ((mask & 1) + 2);
3445   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3446
3447   if (<MODE>mode == V2DFmode)
3448     gen = gen_vsx_xxpermdi2_v2df_1;
3449   else
3450     {
3451       gen = gen_vsx_xxpermdi2_v2di_1;
3452       if (<MODE>mode != V2DImode)
3453         {
3454           target = gen_lowpart (V2DImode, target);
3455           op0 = gen_lowpart (V2DImode, op0);
3456           op1 = gen_lowpart (V2DImode, op1);
3457         }
3458     }
3459   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3460      transformation we don't want; it is necessary for
3461      rs6000_expand_vec_perm_const_1 but not for this use.  So we
3462      prepare for that by reversing the transformation here.  */
3463   if (BYTES_BIG_ENDIAN)
3464     emit_insn (gen (target, op0, op1, perm0, perm1));
3465   else
3466     {
3467       rtx p0 = GEN_INT (3 - INTVAL (perm1));
3468       rtx p1 = GEN_INT (3 - INTVAL (perm0));
3469       emit_insn (gen (target, op1, op0, p0, p1));
3470     }
3471   DONE;
3472 })
3473
3474 (define_insn "vsx_xxpermdi2_<mode>_1"
3475   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3476         (vec_select:VSX_D
3477           (vec_concat:<VS_double>
3478             (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3479             (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3480           (parallel [(match_operand 3 "const_0_to_1_operand" "")
3481                      (match_operand 4 "const_2_to_3_operand" "")])))]
3482   "VECTOR_MEM_VSX_P (<MODE>mode)"
3483 {
3484   int op3, op4, mask;
3485
3486   /* For little endian, swap operands and invert/swap selectors
3487      to get the correct xxpermdi.  The operand swap sets up the
3488      inputs as a little endian array.  The selectors are swapped
3489      because they are defined to use big endian ordering.  The
3490      selectors are inverted to get the correct doublewords for
3491      little endian ordering.  */
3492   if (BYTES_BIG_ENDIAN)
3493     {
3494       op3 = INTVAL (operands[3]);
3495       op4 = INTVAL (operands[4]);
3496     }
3497   else
3498     {
3499       op3 = 3 - INTVAL (operands[4]);
3500       op4 = 3 - INTVAL (operands[3]);
3501     }
3502
3503   mask = (op3 << 1) | (op4 - 2);
3504   operands[3] = GEN_INT (mask);
3505
3506   if (BYTES_BIG_ENDIAN)
3507     return "xxpermdi %x0,%x1,%x2,%3";
3508   else
3509     return "xxpermdi %x0,%x2,%x1,%3";
3510 }
3511   [(set_attr "type" "vecperm")])
3512
3513 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3514 ;; none of the small types were allowed in a vector register, so we had to
3515 ;; extract to a DImode and either do a direct move or store.
3516 (define_expand  "vsx_extract_<mode>"
3517   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3518                    (vec_select:<VS_scalar>
3519                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3520                     (parallel [(match_operand:QI 2 "const_int_operand")])))
3521               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3522   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3523 {
3524   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3525   if (TARGET_P9_VECTOR)
3526     {
3527       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3528                                             operands[2]));
3529       DONE;
3530     }
3531 })
3532
3533 (define_insn "vsx_extract_<mode>_p9"
3534   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3535         (vec_select:<VS_scalar>
3536          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3537          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3538    (clobber (match_scratch:SI 3 "=r,X"))]
3539   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3540 {
3541   if (which_alternative == 0)
3542     return "#";
3543
3544   else
3545     {
3546       HOST_WIDE_INT elt = INTVAL (operands[2]);
3547       HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
3548                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3549                                : elt);
3550
3551       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3552       HOST_WIDE_INT offset = unit_size * elt_adj;
3553
3554       operands[2] = GEN_INT (offset);
3555       if (unit_size == 4)
3556         return "xxextractuw %x0,%x1,%2";
3557       else
3558         return "vextractu<wd> %0,%1,%2";
3559     }
3560 }
3561   [(set_attr "type" "vecsimple")])
3562
3563 (define_split
3564   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3565         (vec_select:<VS_scalar>
3566          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3567          (parallel [(match_operand:QI 2 "const_int_operand")])))
3568    (clobber (match_operand:SI 3 "int_reg_operand"))]
3569   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3570   [(const_int 0)]
3571 {
3572   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3573   rtx op1 = operands[1];
3574   rtx op2 = operands[2];
3575   rtx op3 = operands[3];
3576   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3577
3578   emit_move_insn (op3, GEN_INT (offset));
3579   if (VECTOR_ELT_ORDER_BIG)
3580     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3581   else
3582     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3583   DONE;
3584 })
3585
3586 ;; Optimize zero extracts to eliminate the AND after the extract.
3587 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3588   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3589         (zero_extend:DI
3590          (vec_select:<VS_scalar>
3591           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3592           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3593    (clobber (match_scratch:SI 3 "=r,X"))]
3594   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3595   "#"
3596   "&& reload_completed"
3597   [(parallel [(set (match_dup 4)
3598                    (vec_select:<VS_scalar>
3599                     (match_dup 1)
3600                     (parallel [(match_dup 2)])))
3601               (clobber (match_dup 3))])]
3602 {
3603   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3604 })
3605
3606 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3607 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3608   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3609         (vec_select:<VS_scalar>
3610          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3611          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3612    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3613    (clobber (match_scratch:SI 4 "=X,&r"))]
3614   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3615   "#"
3616   "&& reload_completed"
3617   [(parallel [(set (match_dup 3)
3618                    (vec_select:<VS_scalar>
3619                     (match_dup 1)
3620                     (parallel [(match_dup 2)])))
3621               (clobber (match_dup 4))])
3622    (set (match_dup 0)
3623         (match_dup 3))])
3624
3625 (define_insn_and_split  "*vsx_extract_si"
3626   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3627         (vec_select:SI
3628          (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3629          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3630    (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3631   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3632   "#"
3633   "&& reload_completed"
3634   [(const_int 0)]
3635 {
3636   rtx dest = operands[0];
3637   rtx src = operands[1];
3638   rtx element = operands[2];
3639   rtx vec_tmp = operands[3];
3640   int value;
3641
3642   if (!VECTOR_ELT_ORDER_BIG)
3643     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3644
3645   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3646      instruction.  */
3647   value = INTVAL (element);
3648   if (value != 1)
3649     emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3650   else
3651     vec_tmp = src;
3652
3653   if (MEM_P (operands[0]))
3654     {
3655       if (can_create_pseudo_p ())
3656         dest = rs6000_address_for_fpconvert (dest);
3657
3658       if (TARGET_P8_VECTOR)
3659         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3660       else
3661         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3662     }
3663
3664   else if (TARGET_P8_VECTOR)
3665     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3666   else
3667     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3668                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
3669
3670   DONE;
3671 }
3672   [(set_attr "type" "mftgpr,vecperm,fpstore")
3673    (set_attr "length" "8")])
3674
3675 (define_insn_and_split  "*vsx_extract_<mode>_p8"
3676   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3677         (vec_select:<VS_scalar>
3678          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3679          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3680    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3681   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3682    && !TARGET_P9_VECTOR"
3683   "#"
3684   "&& reload_completed"
3685   [(const_int 0)]
3686 {
3687   rtx dest = operands[0];
3688   rtx src = operands[1];
3689   rtx element = operands[2];
3690   rtx vec_tmp = operands[3];
3691   int value;
3692
3693   if (!VECTOR_ELT_ORDER_BIG)
3694     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3695
3696   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3697      instruction.  */
3698   value = INTVAL (element);
3699   if (<MODE>mode == V16QImode)
3700     {
3701       if (value != 7)
3702         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3703       else
3704         vec_tmp = src;
3705     }
3706   else if (<MODE>mode == V8HImode)
3707     {
3708       if (value != 3)
3709         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3710       else
3711         vec_tmp = src;
3712     }
3713   else
3714     gcc_unreachable ();
3715
3716   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3717                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
3718   DONE;
3719 }
3720   [(set_attr "type" "mftgpr")])
3721
3722 ;; Optimize extracting a single scalar element from memory.
3723 (define_insn_and_split "*vsx_extract_<mode>_load"
3724   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3725         (vec_select:<VS_scalar>
3726          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3727          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3728    (clobber (match_scratch:DI 3 "=&b"))]
3729   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3730   "#"
3731   "&& reload_completed"
3732   [(set (match_dup 0) (match_dup 4))]
3733 {
3734   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3735                                            operands[3], <VS_scalar>mode);
3736 }
3737   [(set_attr "type" "load")
3738    (set_attr "length" "8")])
3739
3740 ;; Variable V16QI/V8HI/V4SI extract
3741 (define_insn_and_split "vsx_extract_<mode>_var"
3742   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3743         (unspec:<VS_scalar>
3744          [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3745           (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3746          UNSPEC_VSX_EXTRACT))
3747    (clobber (match_scratch:DI 3 "=r,r,&b"))
3748    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3749   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3750   "#"
3751   "&& reload_completed"
3752   [(const_int 0)]
3753 {
3754   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3755                                 operands[3], operands[4]);
3756   DONE;
3757 })
3758
3759 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3760   [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3761         (zero_extend:SDI
3762          (unspec:<VSX_EXTRACT_I:VS_scalar>
3763           [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3764            (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3765           UNSPEC_VSX_EXTRACT)))
3766    (clobber (match_scratch:DI 3 "=r,r,&b"))
3767    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3768   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3769   "#"
3770   "&& reload_completed"
3771   [(const_int 0)]
3772 {
3773   machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3774   rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3775                                 operands[1], operands[2],
3776                                 operands[3], operands[4]);
3777   DONE;
3778 })
3779
3780 ;; VSX_EXTRACT optimizations
3781 ;; Optimize double d = (double) vec_extract (vi, <n>)
3782 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3783 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3784   [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3785         (any_float:DF
3786          (vec_select:SI
3787           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3788           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3789    (clobber (match_scratch:V4SI 3 "=v"))]
3790   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3791   "#"
3792   "&& 1"
3793   [(const_int 0)]
3794 {
3795   rtx dest = operands[0];
3796   rtx src = operands[1];
3797   rtx element = operands[2];
3798   rtx v4si_tmp = operands[3];
3799   int value;
3800
3801   if (!VECTOR_ELT_ORDER_BIG)
3802     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3803
3804   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3805      instruction.  */
3806   value = INTVAL (element);
3807   if (value != 0)
3808     {
3809       if (GET_CODE (v4si_tmp) == SCRATCH)
3810         v4si_tmp = gen_reg_rtx (V4SImode);
3811       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3812     }
3813   else
3814     v4si_tmp = src;
3815
3816   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3817   DONE;
3818 })
3819
3820 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3821 ;; where <type> is a floating point type that supported by the hardware that is
3822 ;; not double.  First convert the value to double, and then to the desired
3823 ;; type.
3824 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3825   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3826         (any_float:VSX_EXTRACT_FL
3827          (vec_select:SI
3828           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3829           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3830    (clobber (match_scratch:V4SI 3 "=v"))
3831    (clobber (match_scratch:DF 4 "=ws"))]
3832   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3833   "#"
3834   "&& 1"
3835   [(const_int 0)]
3836 {
3837   rtx dest = operands[0];
3838   rtx src = operands[1];
3839   rtx element = operands[2];
3840   rtx v4si_tmp = operands[3];
3841   rtx df_tmp = operands[4];
3842   int value;
3843
3844   if (!VECTOR_ELT_ORDER_BIG)
3845     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3846
3847   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3848      instruction.  */
3849   value = INTVAL (element);
3850   if (value != 0)
3851     {
3852       if (GET_CODE (v4si_tmp) == SCRATCH)
3853         v4si_tmp = gen_reg_rtx (V4SImode);
3854       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3855     }
3856   else
3857     v4si_tmp = src;
3858
3859   if (GET_CODE (df_tmp) == SCRATCH)
3860     df_tmp = gen_reg_rtx (DFmode);
3861
3862   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3863
3864   if (<MODE>mode == SFmode)
3865     emit_insn (gen_truncdfsf2 (dest, df_tmp));
3866   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3867     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3868   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3869            && TARGET_FLOAT128_HW)
3870     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3871   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3872     emit_insn (gen_extenddfif2 (dest, df_tmp));
3873   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3874     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3875   else
3876     gcc_unreachable ();
3877
3878   DONE;
3879 })
3880
3881 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3882 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3883 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3884 ;; vector short or vector unsigned short.
3885 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3886   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3887         (float:FL_CONV
3888          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3889           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3890           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3891    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3892   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3893    && TARGET_P9_VECTOR"
3894   "#"
3895   "&& reload_completed"
3896   [(parallel [(set (match_dup 3)
3897                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3898                     (match_dup 1)
3899                     (parallel [(match_dup 2)])))
3900               (clobber (scratch:SI))])
3901    (set (match_dup 4)
3902         (sign_extend:DI (match_dup 3)))
3903    (set (match_dup 0)
3904         (float:<FL_CONV:MODE> (match_dup 4)))]
3905 {
3906   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3907 })
3908
3909 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3910   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3911         (unsigned_float:FL_CONV
3912          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3913           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3914           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3915    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3916   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3917    && TARGET_P9_VECTOR"
3918   "#"
3919   "&& reload_completed"
3920   [(parallel [(set (match_dup 3)
3921                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3922                     (match_dup 1)
3923                     (parallel [(match_dup 2)])))
3924               (clobber (scratch:SI))])
3925    (set (match_dup 0)
3926         (float:<FL_CONV:MODE> (match_dup 4)))]
3927 {
3928   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3929 })
3930
3931 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3932 (define_insn "vsx_set_<mode>_p9"
3933   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3934         (unspec:VSX_EXTRACT_I
3935          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3936           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3937           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3938          UNSPEC_VSX_SET))]
3939   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3940 {
3941   int ele = INTVAL (operands[3]);
3942   int nunits = GET_MODE_NUNITS (<MODE>mode);
3943
3944   if (!VECTOR_ELT_ORDER_BIG)
3945     ele = nunits - 1 - ele;
3946
3947   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3948   if (<MODE>mode == V4SImode)
3949     return "xxinsertw %x0,%x2,%3";
3950   else
3951     return "vinsert<wd> %0,%2,%3";
3952 }
3953   [(set_attr "type" "vecperm")])
3954
3955 (define_insn_and_split "vsx_set_v4sf_p9"
3956   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3957         (unspec:V4SF
3958          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3959           (match_operand:SF 2 "gpc_reg_operand" "ww")
3960           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3961          UNSPEC_VSX_SET))
3962    (clobber (match_scratch:SI 4 "=&wJwK"))]
3963   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3964   "#"
3965   "&& reload_completed"
3966   [(set (match_dup 5)
3967         (unspec:V4SF [(match_dup 2)]
3968                      UNSPEC_VSX_CVDPSPN))
3969    (parallel [(set (match_dup 4)
3970                    (vec_select:SI (match_dup 6)
3971                                   (parallel [(match_dup 7)])))
3972               (clobber (scratch:SI))])
3973    (set (match_dup 8)
3974         (unspec:V4SI [(match_dup 8)
3975                       (match_dup 4)
3976                       (match_dup 3)]
3977                      UNSPEC_VSX_SET))]
3978 {
3979   unsigned int tmp_regno = reg_or_subregno (operands[4]);
3980
3981   operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3982   operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3983   operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
3984   operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3985 }
3986   [(set_attr "type" "vecperm")
3987    (set_attr "length" "12")])
3988
3989 ;; Special case setting 0.0f to a V4SF element
3990 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3991   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3992         (unspec:V4SF
3993          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3994           (match_operand:SF 2 "zero_fp_constant" "j")
3995           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3996          UNSPEC_VSX_SET))
3997    (clobber (match_scratch:SI 4 "=&wJwK"))]
3998   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3999   "#"
4000   "&& reload_completed"
4001   [(set (match_dup 4)
4002         (const_int 0))
4003    (set (match_dup 5)
4004         (unspec:V4SI [(match_dup 5)
4005                       (match_dup 4)
4006                       (match_dup 3)]
4007                      UNSPEC_VSX_SET))]
4008 {
4009   operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4010 }
4011   [(set_attr "type" "vecperm")
4012    (set_attr "length" "8")])
4013
4014 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4015 ;; that is in the default scalar position (1 for big endian, 2 for little
4016 ;; endian).  We just need to do an xxinsertw since the element is in the
4017 ;; correct location.
4018
4019 (define_insn "*vsx_insert_extract_v4sf_p9"
4020   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4021         (unspec:V4SF
4022          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4023           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4024                          (parallel
4025                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4026           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4027          UNSPEC_VSX_SET))]
4028   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4029    && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4030 {
4031   int ele = INTVAL (operands[4]);
4032
4033   if (!VECTOR_ELT_ORDER_BIG)
4034     ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4035
4036   operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4037   return "xxinsertw %x0,%x2,%4";
4038 }
4039   [(set_attr "type" "vecperm")])
4040
4041 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4042 ;; that is in the default scalar position (1 for big endian, 2 for little
4043 ;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4044
4045 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4046   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4047         (unspec:V4SF
4048          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4049           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4050                          (parallel
4051                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4052           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4053          UNSPEC_VSX_SET))
4054    (clobber (match_scratch:SI 5 "=&wJwK"))]
4055   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4056    && TARGET_P9_VECTOR && TARGET_POWERPC64
4057    && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4058   "#"
4059   "&& 1"
4060   [(parallel [(set (match_dup 5)
4061                    (vec_select:SI (match_dup 6)
4062                                   (parallel [(match_dup 3)])))
4063               (clobber (scratch:SI))])
4064    (set (match_dup 7)
4065         (unspec:V4SI [(match_dup 8)
4066                       (match_dup 5)
4067                       (match_dup 4)]
4068                      UNSPEC_VSX_SET))]
4069 {
4070   if (GET_CODE (operands[5]) == SCRATCH)
4071     operands[5] = gen_reg_rtx (SImode);
4072
4073   operands[6] = gen_lowpart (V4SImode, operands[2]);
4074   operands[7] = gen_lowpart (V4SImode, operands[0]);
4075   operands[8] = gen_lowpart (V4SImode, operands[1]);
4076 }
4077   [(set_attr "type" "vecperm")])
4078
4079 ;; Expanders for builtins
4080 (define_expand "vsx_mergel_<mode>"
4081   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
4082    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
4083    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
4084   "VECTOR_MEM_VSX_P (<MODE>mode)"
4085 {
4086   rtvec v;
4087   rtx x;
4088
4089   /* Special handling for LE with -maltivec=be.  */
4090   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4091     {
4092       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4093       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4094     }
4095   else
4096     {
4097       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4098       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4099     }
4100
4101   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4102   emit_insn (gen_rtx_SET (operands[0], x));
4103   DONE;
4104 })
4105
4106 (define_expand "vsx_mergeh_<mode>"
4107   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
4108    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
4109    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
4110   "VECTOR_MEM_VSX_P (<MODE>mode)"
4111 {
4112   rtvec v;
4113   rtx x;
4114
4115   /* Special handling for LE with -maltivec=be.  */
4116   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4117     {
4118       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4119       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4120     }
4121   else
4122     {
4123       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4124       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4125     }
4126
4127   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4128   emit_insn (gen_rtx_SET (operands[0], x));
4129   DONE;
4130 })
4131
4132 ;; V2DF/V2DI splat
4133 ;; We separate the register splat insn from the memory splat insn to force the
4134 ;; register allocator to generate the indexed form of the SPLAT when it is
4135 ;; given an offsettable memory reference.  Otherwise, if the register and
4136 ;; memory insns were combined into a single insn, the register allocator will
4137 ;; load the value into a register, and then do a double word permute.
4138 (define_expand "vsx_splat_<mode>"
4139   [(set (match_operand:VSX_D 0 "vsx_register_operand")
4140         (vec_duplicate:VSX_D
4141          (match_operand:<VS_scalar> 1 "input_operand")))]
4142   "VECTOR_MEM_VSX_P (<MODE>mode)"
4143 {
4144   rtx op1 = operands[1];
4145   if (MEM_P (op1))
4146     operands[1] = rs6000_address_for_fpconvert (op1);
4147   else if (!REG_P (op1))
4148     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4149 })
4150
4151 (define_insn "vsx_splat_<mode>_reg"
4152   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4153         (vec_duplicate:VSX_D
4154          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
4155   "VECTOR_MEM_VSX_P (<MODE>mode)"
4156   "@
4157    xxpermdi %x0,%x1,%x1,0
4158    mtvsrdd %x0,%1,%1"
4159   [(set_attr "type" "vecperm")])
4160
4161 (define_insn "vsx_splat_<VSX_D:mode>_mem"
4162   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4163         (vec_duplicate:VSX_D
4164          (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4165   "VECTOR_MEM_VSX_P (<MODE>mode)"
4166   "lxvdsx %x0,%y1"
4167   [(set_attr "type" "vecload")])
4168
4169 ;; V4SI splat support
4170 (define_insn "vsx_splat_v4si"
4171   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4172         (vec_duplicate:V4SI
4173          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4174   "TARGET_P9_VECTOR"
4175   "@
4176    mtvsrws %x0,%1
4177    lxvwsx %x0,%y1"
4178   [(set_attr "type" "vecperm,vecload")])
4179
4180 ;; SImode is not currently allowed in vector registers.  This pattern
4181 ;; allows us to use direct move to get the value in a vector register
4182 ;; so that we can use XXSPLTW
4183 (define_insn "vsx_splat_v4si_di"
4184   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4185         (vec_duplicate:V4SI
4186          (truncate:SI
4187           (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4188   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4189   "@
4190    xxspltw %x0,%x1,1
4191    mtvsrws %x0,%1"
4192   [(set_attr "type" "vecperm")])
4193
4194 ;; V4SF splat (ISA 3.0)
4195 (define_insn_and_split "vsx_splat_v4sf"
4196   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4197         (vec_duplicate:V4SF
4198          (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
4199   "TARGET_P9_VECTOR"
4200   "@
4201    lxvwsx %x0,%y1
4202    #
4203    mtvsrws %x0,%1"
4204   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4205   [(set (match_dup 0)
4206         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4207    (set (match_dup 0)
4208         (unspec:V4SF [(match_dup 0)
4209                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4210   ""
4211   [(set_attr "type" "vecload,vecperm,mftgpr")
4212    (set_attr "length" "4,8,4")])
4213
4214 ;; V4SF/V4SI splat from a vector element
4215 (define_insn "vsx_xxspltw_<mode>"
4216   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4217         (vec_duplicate:VSX_W
4218          (vec_select:<VS_scalar>
4219           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4220           (parallel
4221            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4222   "VECTOR_MEM_VSX_P (<MODE>mode)"
4223 {
4224   if (!BYTES_BIG_ENDIAN)
4225     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4226
4227   return "xxspltw %x0,%x1,%2";
4228 }
4229   [(set_attr "type" "vecperm")])
4230
4231 (define_insn "vsx_xxspltw_<mode>_direct"
4232   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4233         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4234                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4235                       UNSPEC_VSX_XXSPLTW))]
4236   "VECTOR_MEM_VSX_P (<MODE>mode)"
4237   "xxspltw %x0,%x1,%2"
4238   [(set_attr "type" "vecperm")])
4239
4240 ;; V16QI/V8HI splat support on ISA 2.07
4241 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4242   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4243         (vec_duplicate:VSX_SPLAT_I
4244          (truncate:<VS_scalar>
4245           (match_operand:DI 1 "altivec_register_operand" "v"))))]
4246   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4247   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4248   [(set_attr "type" "vecperm")])
4249
4250 ;; V2DF/V2DI splat for use by vec_splat builtin
4251 (define_insn "vsx_xxspltd_<mode>"
4252   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4253         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4254                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4255                       UNSPEC_VSX_XXSPLTD))]
4256   "VECTOR_MEM_VSX_P (<MODE>mode)"
4257 {
4258   if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
4259       || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
4260     return "xxpermdi %x0,%x1,%x1,0";
4261   else
4262     return "xxpermdi %x0,%x1,%x1,3";
4263 }
4264   [(set_attr "type" "vecperm")])
4265
4266 ;; V4SF/V4SI interleave
4267 (define_insn "vsx_xxmrghw_<mode>"
4268   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4269         (vec_select:VSX_W
4270           (vec_concat:<VS_double>
4271             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4272             (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
4273           (parallel [(const_int 0) (const_int 4)
4274                      (const_int 1) (const_int 5)])))]
4275   "VECTOR_MEM_VSX_P (<MODE>mode)"
4276 {
4277   if (BYTES_BIG_ENDIAN)
4278     return "xxmrghw %x0,%x1,%x2";
4279   else
4280     return "xxmrglw %x0,%x2,%x1";
4281 }
4282   [(set_attr "type" "vecperm")])
4283
4284 (define_insn "vsx_xxmrglw_<mode>"
4285   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4286         (vec_select:VSX_W
4287           (vec_concat:<VS_double>
4288             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4289             (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
4290           (parallel [(const_int 2) (const_int 6)
4291                      (const_int 3) (const_int 7)])))]
4292   "VECTOR_MEM_VSX_P (<MODE>mode)"
4293 {
4294   if (BYTES_BIG_ENDIAN)
4295     return "xxmrglw %x0,%x1,%x2";
4296   else
4297     return "xxmrghw %x0,%x2,%x1";
4298 }
4299   [(set_attr "type" "vecperm")])
4300
4301 ;; Shift left double by word immediate
4302 (define_insn "vsx_xxsldwi_<mode>"
4303   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4304         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4305                        (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
4306                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
4307                       UNSPEC_VSX_SLDWI))]
4308   "VECTOR_MEM_VSX_P (<MODE>mode)"
4309   "xxsldwi %x0,%x1,%x2,%3"
4310   [(set_attr "type" "vecperm")])
4311
4312 \f
4313 ;; Vector reduction insns and splitters
4314
4315 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4316   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4317         (VEC_reduc:V2DF
4318          (vec_concat:V2DF
4319           (vec_select:DF
4320            (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4321            (parallel [(const_int 1)]))
4322           (vec_select:DF
4323            (match_dup 1)
4324            (parallel [(const_int 0)])))
4325          (match_dup 1)))
4326    (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4327   "VECTOR_UNIT_VSX_P (V2DFmode)"
4328   "#"
4329   ""
4330   [(const_int 0)]
4331   "
4332 {
4333   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4334              ? gen_reg_rtx (V2DFmode)
4335              : operands[2];
4336   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4337   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4338   DONE;
4339 }"
4340   [(set_attr "length" "8")
4341    (set_attr "type" "veccomplex")])
4342
4343 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4344   [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4345         (VEC_reduc:V4SF
4346          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4347          (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4348    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4349    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4350   "VECTOR_UNIT_VSX_P (V4SFmode)"
4351   "#"
4352   ""
4353   [(const_int 0)]
4354   "
4355 {
4356   rtx op0 = operands[0];
4357   rtx op1 = operands[1];
4358   rtx tmp2, tmp3, tmp4;
4359
4360   if (can_create_pseudo_p ())
4361     {
4362       tmp2 = gen_reg_rtx (V4SFmode);
4363       tmp3 = gen_reg_rtx (V4SFmode);
4364       tmp4 = gen_reg_rtx (V4SFmode);
4365     }
4366   else
4367     {
4368       tmp2 = operands[2];
4369       tmp3 = operands[3];
4370       tmp4 = tmp2;
4371     }
4372
4373   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4374   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4375   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4376   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4377   DONE;
4378 }"
4379   [(set_attr "length" "16")
4380    (set_attr "type" "veccomplex")])
4381
4382 ;; Combiner patterns with the vector reduction patterns that knows we can get
4383 ;; to the top element of the V2DF array without doing an extract.
4384
4385 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4386   [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
4387         (vec_select:DF
4388          (VEC_reduc:V2DF
4389           (vec_concat:V2DF
4390            (vec_select:DF
4391             (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4392             (parallel [(const_int 1)]))
4393            (vec_select:DF
4394             (match_dup 1)
4395             (parallel [(const_int 0)])))
4396           (match_dup 1))
4397          (parallel [(const_int 1)])))
4398    (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4399   "VECTOR_UNIT_VSX_P (V2DFmode)"
4400   "#"
4401   ""
4402   [(const_int 0)]
4403   "
4404 {
4405   rtx hi = gen_highpart (DFmode, operands[1]);
4406   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4407             ? gen_reg_rtx (DFmode)
4408             : operands[2];
4409
4410   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4411   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4412   DONE;
4413 }"
4414   [(set_attr "length" "8")
4415    (set_attr "type" "veccomplex")])
4416
4417 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4418   [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4419         (vec_select:SF
4420          (VEC_reduc:V4SF
4421           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4422           (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4423          (parallel [(const_int 3)])))
4424    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4425    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4426    (clobber (match_scratch:V4SF 4 "=0,0"))]
4427   "VECTOR_UNIT_VSX_P (V4SFmode)"
4428   "#"
4429   ""
4430   [(const_int 0)]
4431   "
4432 {
4433   rtx op0 = operands[0];
4434   rtx op1 = operands[1];
4435   rtx tmp2, tmp3, tmp4, tmp5;
4436
4437   if (can_create_pseudo_p ())
4438     {
4439       tmp2 = gen_reg_rtx (V4SFmode);
4440       tmp3 = gen_reg_rtx (V4SFmode);
4441       tmp4 = gen_reg_rtx (V4SFmode);
4442       tmp5 = gen_reg_rtx (V4SFmode);
4443     }
4444   else
4445     {
4446       tmp2 = operands[2];
4447       tmp3 = operands[3];
4448       tmp4 = tmp2;
4449       tmp5 = operands[4];
4450     }
4451
4452   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4453   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4454   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4455   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4456   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4457   DONE;
4458 }"
4459   [(set_attr "length" "20")
4460    (set_attr "type" "veccomplex")])
4461
4462 \f
4463 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4464 (define_peephole
4465   [(set (match_operand:P 0 "base_reg_operand" "")
4466         (match_operand:P 1 "short_cint_operand" ""))
4467    (set (match_operand:VSX_M 2 "vsx_register_operand" "")
4468         (mem:VSX_M (plus:P (match_dup 0)
4469                            (match_operand:P 3 "int_reg_operand" ""))))]
4470   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4471   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4472   [(set_attr "length" "8")
4473    (set_attr "type" "vecload")])
4474
4475 (define_peephole
4476   [(set (match_operand:P 0 "base_reg_operand" "")
4477         (match_operand:P 1 "short_cint_operand" ""))
4478    (set (match_operand:VSX_M 2 "vsx_register_operand" "")
4479         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
4480                            (match_dup 0))))]
4481   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4482   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4483   [(set_attr "length" "8")
4484    (set_attr "type" "vecload")])
4485
4486 \f
4487 ;; ISA 3.0 vector extend sign support
4488
4489 (define_insn "vsx_sign_extend_qi_<mode>"
4490   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4491         (unspec:VSINT_84
4492          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4493          UNSPEC_VSX_SIGN_EXTEND))]
4494   "TARGET_P9_VECTOR"
4495   "vextsb2<wd> %0,%1"
4496   [(set_attr "type" "vecexts")])
4497
4498 (define_insn "vsx_sign_extend_hi_<mode>"
4499   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4500         (unspec:VSINT_84
4501          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4502          UNSPEC_VSX_SIGN_EXTEND))]
4503   "TARGET_P9_VECTOR"
4504   "vextsh2<wd> %0,%1"
4505   [(set_attr "type" "vecexts")])
4506
4507 (define_insn "*vsx_sign_extend_si_v2di"
4508   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4509         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4510                      UNSPEC_VSX_SIGN_EXTEND))]
4511   "TARGET_P9_VECTOR"
4512   "vextsw2d %0,%1"
4513   [(set_attr "type" "vecexts")])
4514
4515 \f
4516 ;; ISA 3.0 Binary Floating-Point Support
4517
4518 ;; VSX Scalar Extract Exponent Quad-Precision
4519 (define_insn "xsxexpqp_<mode>"
4520   [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4521         (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4522          UNSPEC_VSX_SXEXPDP))]
4523   "TARGET_P9_VECTOR"
4524   "xsxexpqp %0,%1"
4525   [(set_attr "type" "vecmove")])
4526
4527 ;; VSX Scalar Extract Exponent Double-Precision
4528 (define_insn "xsxexpdp"
4529   [(set (match_operand:DI 0 "register_operand" "=r")
4530         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4531          UNSPEC_VSX_SXEXPDP))]
4532   "TARGET_P9_VECTOR && TARGET_64BIT"
4533   "xsxexpdp %0,%x1"
4534   [(set_attr "type" "integer")])
4535
4536 ;; VSX Scalar Extract Significand Quad-Precision
4537 (define_insn "xsxsigqp_<mode>"
4538   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4539         (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4540          UNSPEC_VSX_SXSIG))]
4541   "TARGET_P9_VECTOR"
4542   "xsxsigqp %0,%1"
4543   [(set_attr "type" "vecmove")])
4544
4545 ;; VSX Scalar Extract Significand Double-Precision
4546 (define_insn "xsxsigdp"
4547   [(set (match_operand:DI 0 "register_operand" "=r")
4548         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4549          UNSPEC_VSX_SXSIG))]
4550   "TARGET_P9_VECTOR && TARGET_64BIT"
4551   "xsxsigdp %0,%x1"
4552   [(set_attr "type" "integer")])
4553
4554 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4555 (define_insn "xsiexpqpf_<mode>"
4556   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4557         (unspec:IEEE128
4558          [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4559           (match_operand:DI 2 "altivec_register_operand" "v")]
4560          UNSPEC_VSX_SIEXPQP))]
4561   "TARGET_P9_VECTOR"
4562   "xsiexpqp %0,%1,%2"
4563   [(set_attr "type" "vecmove")])
4564
4565 ;; VSX Scalar Insert Exponent Quad-Precision
4566 (define_insn "xsiexpqp_<mode>"
4567   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4568         (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4569                          (match_operand:DI 2 "altivec_register_operand" "v")]
4570          UNSPEC_VSX_SIEXPQP))]
4571   "TARGET_P9_VECTOR"
4572   "xsiexpqp %0,%1,%2"
4573   [(set_attr "type" "vecmove")])
4574
4575 ;; VSX Scalar Insert Exponent Double-Precision
4576 (define_insn "xsiexpdp"
4577   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4578         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4579                     (match_operand:DI 2 "register_operand" "r")]
4580          UNSPEC_VSX_SIEXPDP))]
4581   "TARGET_P9_VECTOR && TARGET_64BIT"
4582   "xsiexpdp %x0,%1,%2"
4583   [(set_attr "type" "fpsimple")])
4584
4585 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4586 (define_insn "xsiexpdpf"
4587   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4588         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4589                     (match_operand:DI 2 "register_operand" "r")]
4590          UNSPEC_VSX_SIEXPDP))]
4591   "TARGET_P9_VECTOR && TARGET_64BIT"
4592   "xsiexpdp %x0,%1,%2"
4593   [(set_attr "type" "fpsimple")])
4594
4595 ;; VSX Scalar Compare Exponents Double-Precision
4596 (define_expand "xscmpexpdp_<code>"
4597   [(set (match_dup 3)
4598         (compare:CCFP
4599          (unspec:DF
4600           [(match_operand:DF 1 "vsx_register_operand" "wa")
4601            (match_operand:DF 2 "vsx_register_operand" "wa")]
4602           UNSPEC_VSX_SCMPEXPDP)
4603          (const_int 0)))
4604    (set (match_operand:SI 0 "register_operand" "=r")
4605         (CMP_TEST:SI (match_dup 3)
4606                      (const_int 0)))]
4607   "TARGET_P9_VECTOR"
4608 {
4609   operands[3] = gen_reg_rtx (CCFPmode);
4610 })
4611
4612 (define_insn "*xscmpexpdp"
4613   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4614         (compare:CCFP
4615          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4616                      (match_operand:DF 2 "vsx_register_operand" "wa")]
4617           UNSPEC_VSX_SCMPEXPDP)
4618          (match_operand:SI 3 "zero_constant" "j")))]
4619   "TARGET_P9_VECTOR"
4620   "xscmpexpdp %0,%x1,%x2"
4621   [(set_attr "type" "fpcompare")])
4622
4623 ;; VSX Scalar Test Data Class Quad-Precision
4624 ;;  (Expansion for scalar_test_data_class (__ieee128, int))
4625 ;;   (Has side effect of setting the lt bit if operand 1 is negative,
4626 ;;    setting the eq bit if any of the conditions tested by operand 2
4627 ;;    are satisfied, and clearing the gt and undordered bits to zero.)
4628 (define_expand "xststdcqp_<mode>"
4629   [(set (match_dup 3)
4630         (compare:CCFP
4631          (unspec:IEEE128
4632           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4633            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4634           UNSPEC_VSX_STSTDC)
4635          (const_int 0)))
4636    (set (match_operand:SI 0 "register_operand" "=r")
4637         (eq:SI (match_dup 3)
4638                (const_int 0)))]
4639   "TARGET_P9_VECTOR"
4640 {
4641   operands[3] = gen_reg_rtx (CCFPmode);
4642 })
4643
4644 ;; VSX Scalar Test Data Class Double- and Single-Precision
4645 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4646 ;;   if any of the conditions tested by operand 2 are satisfied.
4647 ;;   The gt and unordered bits are cleared to zero.)
4648 (define_expand "xststdc<Fvsx>"
4649   [(set (match_dup 3)
4650         (compare:CCFP
4651          (unspec:SFDF
4652           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4653            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4654           UNSPEC_VSX_STSTDC)
4655          (match_dup 4)))
4656    (set (match_operand:SI 0 "register_operand" "=r")
4657         (eq:SI (match_dup 3)
4658                (const_int 0)))]
4659   "TARGET_P9_VECTOR"
4660 {
4661   operands[3] = gen_reg_rtx (CCFPmode);
4662   operands[4] = CONST0_RTX (SImode);
4663 })
4664
4665 ;; The VSX Scalar Test Negative Quad-Precision
4666 (define_expand "xststdcnegqp_<mode>"
4667   [(set (match_dup 2)
4668         (compare:CCFP
4669          (unspec:IEEE128
4670           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4671            (const_int 0)]
4672           UNSPEC_VSX_STSTDC)
4673          (const_int 0)))
4674    (set (match_operand:SI 0 "register_operand" "=r")
4675         (lt:SI (match_dup 2)
4676                (const_int 0)))]
4677   "TARGET_P9_VECTOR"
4678 {
4679   operands[2] = gen_reg_rtx (CCFPmode);
4680 })
4681
4682 ;; The VSX Scalar Test Negative Double- and Single-Precision
4683 (define_expand "xststdcneg<Fvsx>"
4684   [(set (match_dup 2)
4685         (compare:CCFP
4686          (unspec:SFDF
4687           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4688            (const_int 0)]
4689           UNSPEC_VSX_STSTDC)
4690          (match_dup 3)))
4691    (set (match_operand:SI 0 "register_operand" "=r")
4692         (lt:SI (match_dup 2)
4693                (const_int 0)))]
4694   "TARGET_P9_VECTOR"
4695 {
4696   operands[2] = gen_reg_rtx (CCFPmode);
4697   operands[3] = CONST0_RTX (SImode);
4698 })
4699
4700 (define_insn "*xststdcqp_<mode>"
4701   [(set (match_operand:CCFP 0 "" "=y")
4702         (compare:CCFP
4703          (unspec:IEEE128
4704           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4705            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4706           UNSPEC_VSX_STSTDC)
4707          (const_int 0)))]
4708   "TARGET_P9_VECTOR"
4709   "xststdcqp %0,%1,%2"
4710   [(set_attr "type" "fpcompare")])
4711
4712 (define_insn "*xststdc<Fvsx>"
4713   [(set (match_operand:CCFP 0 "" "=y")
4714         (compare:CCFP
4715          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4716                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
4717           UNSPEC_VSX_STSTDC)
4718          (match_operand:SI 3 "zero_constant" "j")))]
4719   "TARGET_P9_VECTOR"
4720   "xststdc<Fvsx> %0,%x1,%2"
4721   [(set_attr "type" "fpcompare")])
4722
4723 ;; VSX Vector Extract Exponent Double and Single Precision
4724 (define_insn "xvxexp<VSs>"
4725   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4726         (unspec:VSX_F
4727          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4728          UNSPEC_VSX_VXEXP))]
4729   "TARGET_P9_VECTOR"
4730   "xvxexp<VSs> %x0,%x1"
4731   [(set_attr "type" "vecsimple")])
4732
4733 ;; VSX Vector Extract Significand Double and Single Precision
4734 (define_insn "xvxsig<VSs>"
4735   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4736         (unspec:VSX_F
4737          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4738          UNSPEC_VSX_VXSIG))]
4739   "TARGET_P9_VECTOR"
4740   "xvxsig<VSs> %x0,%x1"
4741   [(set_attr "type" "vecsimple")])
4742
4743 ;; VSX Vector Insert Exponent Double and Single Precision
4744 (define_insn "xviexp<VSs>"
4745   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4746         (unspec:VSX_F
4747          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4748           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4749          UNSPEC_VSX_VIEXP))]
4750   "TARGET_P9_VECTOR"
4751   "xviexp<VSs> %x0,%x1,%x2"
4752   [(set_attr "type" "vecsimple")])
4753
4754 ;; VSX Vector Test Data Class Double and Single Precision
4755 ;; The corresponding elements of the result vector are all ones
4756 ;; if any of the conditions tested by operand 3 are satisfied.
4757 (define_insn "xvtstdc<VSs>"
4758   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4759         (unspec:<VSI>
4760          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4761           (match_operand:SI 2 "u7bit_cint_operand" "n")]
4762          UNSPEC_VSX_VTSTDC))]
4763   "TARGET_P9_VECTOR"
4764   "xvtstdc<VSs> %x0,%x1,%2"
4765   [(set_attr "type" "vecsimple")])
4766
4767 ;; ISA 3.0 String Operations Support
4768
4769 ;; Compare vectors producing a vector result and a predicate, setting CR6
4770 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4771 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4772 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4773 ;; to use Power8 instructions.
4774 (define_insn "*vsx_ne_<mode>_p"
4775   [(set (reg:CC CR6_REGNO)
4776         (unspec:CC
4777          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4778                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4779          UNSPEC_PREDICATE))
4780    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4781         (ne:VSX_EXTRACT_I (match_dup 1)
4782                           (match_dup 2)))]
4783   "TARGET_P9_VECTOR"
4784   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4785   [(set_attr "type" "vecsimple")])
4786
4787 (define_insn "*vector_nez_<mode>_p"
4788   [(set (reg:CC CR6_REGNO)
4789         (unspec:CC [(unspec:VI
4790                      [(match_operand:VI 1 "gpc_reg_operand" "v")
4791                       (match_operand:VI 2 "gpc_reg_operand" "v")]
4792                      UNSPEC_NEZ_P)]
4793          UNSPEC_PREDICATE))
4794    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4795         (unspec:VI [(match_dup 1)
4796                     (match_dup 2)]
4797          UNSPEC_NEZ_P))]
4798   "TARGET_P9_VECTOR"
4799   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4800   [(set_attr "type" "vecsimple")])
4801
4802 ;; Return first position of match between vectors
4803 (define_expand "first_match_index_<mode>"
4804   [(match_operand:SI 0 "register_operand")
4805    (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4806                (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4807   UNSPEC_VSX_FIRST_MATCH_INDEX)]
4808   "TARGET_P9_VECTOR"
4809 {
4810   int sh;
4811
4812   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4813   rtx not_result = gen_reg_rtx (<MODE>mode);
4814
4815   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4816                                              operands[2]));
4817   emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4818
4819   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4820
4821   if (<MODE>mode == V16QImode)
4822     emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4823   else
4824     {
4825       rtx tmp = gen_reg_rtx (SImode);
4826       emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4827       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4828     }
4829   DONE;
4830 })
4831
4832 ;; Return first position of match between vectors or end of string (EOS)
4833 (define_expand "first_match_or_eos_index_<mode>"
4834   [(match_operand:SI 0 "register_operand")
4835    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4836    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4837   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4838   "TARGET_P9_VECTOR"
4839 {
4840   int sh;
4841   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4842   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4843   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4844   rtx and_result = gen_reg_rtx (<MODE>mode);
4845   rtx result = gen_reg_rtx (<MODE>mode);
4846   rtx vzero = gen_reg_rtx (<MODE>mode);
4847
4848   /* Vector with zeros in elements that correspond to zeros in operands.  */
4849   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4850   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4851   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4852   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4853
4854   /* Vector with ones in elments that do not match.  */
4855   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4856                                              operands[2]));
4857
4858   /* Create vector with ones in elements where there was a zero in one of
4859      the source elements or the elements that match.  */
4860   emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4861   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4862
4863   if (<MODE>mode == V16QImode)
4864     emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4865   else
4866     {
4867       rtx tmp = gen_reg_rtx (SImode);
4868       emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4869       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4870     }
4871   DONE;
4872 })
4873
4874 ;; Return first position of mismatch between vectors
4875 (define_expand "first_mismatch_index_<mode>"
4876   [(match_operand:SI 0 "register_operand")
4877    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4878    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4879   UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4880   "TARGET_P9_VECTOR"
4881 {
4882   int sh;
4883   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4884
4885   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4886                                             operands[2]));
4887   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4888
4889   if (<MODE>mode == V16QImode)
4890     emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4891   else
4892     {
4893       rtx tmp = gen_reg_rtx (SImode);
4894       emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4895       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4896     }
4897   DONE;
4898 })
4899
4900 ;; Return first position of mismatch between vectors or end of string (EOS)
4901 (define_expand "first_mismatch_or_eos_index_<mode>"
4902   [(match_operand:SI 0 "register_operand")
4903    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4904    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4905   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4906   "TARGET_P9_VECTOR"
4907 {
4908   int sh;
4909   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4910   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4911   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4912   rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4913   rtx and_result = gen_reg_rtx (<MODE>mode);
4914   rtx result = gen_reg_rtx (<MODE>mode);
4915   rtx vzero = gen_reg_rtx (<MODE>mode);
4916
4917   /* Vector with zeros in elements that correspond to zeros in operands.  */
4918   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4919
4920   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4921   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4922   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4923
4924   /* Vector with ones in elments that match.  */
4925   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4926                                              operands[2]));
4927   emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4928
4929   /* Create vector with ones in elements where there was a zero in one of
4930      the source elements or the elements did not match.  */
4931   emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4932   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4933
4934   if (<MODE>mode == V16QImode)
4935     emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4936   else
4937     {
4938       rtx tmp = gen_reg_rtx (SImode);
4939       emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4940       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4941     }
4942   DONE;
4943 })
4944
4945 ;; Load VSX Vector with Length
4946 (define_expand "lxvl"
4947   [(set (match_dup 3)
4948         (ashift:DI (match_operand:DI 2 "register_operand")
4949                    (const_int 56)))
4950    (set (match_operand:V16QI 0 "vsx_register_operand")
4951         (unspec:V16QI
4952          [(match_operand:DI 1 "gpc_reg_operand")
4953           (mem:V16QI (match_dup 1))
4954           (match_dup 3)]
4955          UNSPEC_LXVL))]
4956   "TARGET_P9_VECTOR && TARGET_64BIT"
4957 {
4958   operands[3] = gen_reg_rtx (DImode);
4959 })
4960
4961 (define_insn "*lxvl"
4962   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4963         (unspec:V16QI
4964          [(match_operand:DI 1 "gpc_reg_operand" "b")
4965           (mem:V16QI (match_dup 1))
4966           (match_operand:DI 2 "register_operand" "r")]
4967          UNSPEC_LXVL))]
4968   "TARGET_P9_VECTOR && TARGET_64BIT"
4969   "lxvl %x0,%1,%2"
4970   [(set_attr "type" "vecload")])
4971
4972 (define_insn "lxvll"
4973   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4974         (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4975                        (mem:V16QI (match_dup 1))
4976                        (match_operand:DI 2 "register_operand" "r")]
4977                       UNSPEC_LXVLL))]
4978   "TARGET_P9_VECTOR"
4979   "lxvll %x0,%1,%2"
4980   [(set_attr "type" "vecload")])
4981
4982 ;; Expand for builtin xl_len_r
4983 (define_expand "xl_len_r"
4984   [(match_operand:V16QI 0 "vsx_register_operand")
4985    (match_operand:DI 1 "register_operand")
4986    (match_operand:DI 2 "register_operand")]
4987   ""
4988 {
4989   rtx shift_mask = gen_reg_rtx (V16QImode);
4990   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4991   rtx tmp = gen_reg_rtx (DImode);
4992
4993   emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4994   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4995   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4996   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4997              shift_mask));
4998   DONE;
4999 })
5000
5001 (define_insn "stxvll"
5002   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5003         (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5004                        (mem:V16QI (match_dup 1))
5005                        (match_operand:DI 2 "register_operand" "r")]
5006                       UNSPEC_STXVLL))]
5007   "TARGET_P9_VECTOR"
5008   "stxvll %x0,%1,%2"
5009   [(set_attr "type" "vecstore")])
5010
5011 ;; Store VSX Vector with Length
5012 (define_expand "stxvl"
5013   [(set (match_dup 3)
5014         (ashift:DI (match_operand:DI 2 "register_operand")
5015                    (const_int 56)))
5016    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5017         (unspec:V16QI
5018          [(match_operand:V16QI 0 "vsx_register_operand")
5019           (mem:V16QI (match_dup 1))
5020           (match_dup 3)]
5021          UNSPEC_STXVL))]
5022   "TARGET_P9_VECTOR && TARGET_64BIT"
5023 {
5024   operands[3] = gen_reg_rtx (DImode);
5025 })
5026
5027 (define_insn "*stxvl"
5028   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5029         (unspec:V16QI
5030          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5031           (mem:V16QI (match_dup 1))
5032           (match_operand:DI 2 "register_operand" "r")]
5033          UNSPEC_STXVL))]
5034   "TARGET_P9_VECTOR && TARGET_64BIT"
5035   "stxvl %x0,%1,%2"
5036   [(set_attr "type" "vecstore")])
5037
5038 ;; Expand for builtin xst_len_r
5039 (define_expand "xst_len_r"
5040   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5041    (match_operand:DI 1 "register_operand" "b")
5042    (match_operand:DI 2 "register_operand" "r")]
5043   "UNSPEC_XST_LEN_R"
5044 {
5045   rtx shift_mask = gen_reg_rtx (V16QImode);
5046   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5047   rtx tmp = gen_reg_rtx (DImode);
5048
5049   emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5050   emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5051              shift_mask));
5052   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5053   emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5054   DONE;
5055 })
5056
5057 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5058 (define_insn "vcmpneb"
5059   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5060          (not:V16QI
5061            (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5062                      (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5063   "TARGET_P9_VECTOR"
5064   "vcmpneb %0,%1,%2"
5065   [(set_attr "type" "vecsimple")])
5066
5067 ;; Vector Compare Not Equal or Zero Byte
5068 (define_insn "vcmpnezb"
5069   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5070         (unspec:V16QI
5071          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5072           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5073          UNSPEC_VCMPNEZB))]
5074   "TARGET_P9_VECTOR"
5075   "vcmpnezb %0,%1,%2"
5076   [(set_attr "type" "vecsimple")])
5077
5078 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5079 (define_insn "vcmpneh"
5080   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5081         (not:V8HI
5082           (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5083                    (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5084   "TARGET_P9_VECTOR"
5085   "vcmpneh %0,%1,%2"
5086   [(set_attr "type" "vecsimple")])
5087
5088 ;; Vector Compare Not Equal or Zero Half Word
5089 (define_insn "vcmpnezh"
5090   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5091         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5092                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
5093          UNSPEC_VCMPNEZH))]
5094   "TARGET_P9_VECTOR"
5095   "vcmpnezh %0,%1,%2"
5096   [(set_attr "type" "vecsimple")])
5097
5098 ;; Vector Compare Not Equal Word (specified/not+eq:)
5099 (define_insn "vcmpnew"
5100   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5101         (not:V4SI
5102           (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5103                    (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5104   "TARGET_P9_VECTOR"
5105   "vcmpnew %0,%1,%2"
5106   [(set_attr "type" "vecsimple")])
5107
5108 ;; Vector Compare Not Equal or Zero Word
5109 (define_insn "vcmpnezw"
5110   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5111         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5112                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
5113          UNSPEC_VCMPNEZW))]
5114   "TARGET_P9_VECTOR"
5115   "vcmpnezw %0,%1,%2"
5116   [(set_attr "type" "vecsimple")])
5117
5118 ;; Vector Count Leading Zero Least-Significant Bits Byte
5119 (define_insn "vclzlsbb"
5120   [(set (match_operand:SI 0 "register_operand" "=r")
5121         (unspec:SI
5122          [(match_operand:V16QI 1 "altivec_register_operand" "v")]
5123          UNSPEC_VCLZLSBB))]
5124   "TARGET_P9_VECTOR"
5125   "vclzlsbb %0,%1"
5126   [(set_attr "type" "vecsimple")])
5127
5128 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5129 (define_insn "vctzlsbb_<mode>"
5130   [(set (match_operand:SI 0 "register_operand" "=r")
5131         (unspec:SI
5132          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5133          UNSPEC_VCTZLSBB))]
5134   "TARGET_P9_VECTOR"
5135   "vctzlsbb %0,%1"
5136   [(set_attr "type" "vecsimple")])
5137
5138 ;; Vector Extract Unsigned Byte Left-Indexed
5139 (define_insn "vextublx"
5140   [(set (match_operand:SI 0 "register_operand" "=r")
5141         (unspec:SI
5142          [(match_operand:SI 1 "register_operand" "r")
5143           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5144          UNSPEC_VEXTUBLX))]
5145   "TARGET_P9_VECTOR"
5146   "vextublx %0,%1,%2"
5147   [(set_attr "type" "vecsimple")])
5148
5149 ;; Vector Extract Unsigned Byte Right-Indexed
5150 (define_insn "vextubrx"
5151   [(set (match_operand:SI 0 "register_operand" "=r")
5152         (unspec:SI
5153          [(match_operand:SI 1 "register_operand" "r")
5154           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5155          UNSPEC_VEXTUBRX))]
5156   "TARGET_P9_VECTOR"
5157   "vextubrx %0,%1,%2"
5158   [(set_attr "type" "vecsimple")])
5159
5160 ;; Vector Extract Unsigned Half Word Left-Indexed
5161 (define_insn "vextuhlx"
5162   [(set (match_operand:SI 0 "register_operand" "=r")
5163         (unspec:SI
5164          [(match_operand:SI 1 "register_operand" "r")
5165           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5166          UNSPEC_VEXTUHLX))]
5167   "TARGET_P9_VECTOR"
5168   "vextuhlx %0,%1,%2"
5169   [(set_attr "type" "vecsimple")])
5170
5171 ;; Vector Extract Unsigned Half Word Right-Indexed
5172 (define_insn "vextuhrx"
5173   [(set (match_operand:SI 0 "register_operand" "=r")
5174         (unspec:SI
5175          [(match_operand:SI 1 "register_operand" "r")
5176           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5177          UNSPEC_VEXTUHRX))]
5178   "TARGET_P9_VECTOR"
5179   "vextuhrx %0,%1,%2"
5180   [(set_attr "type" "vecsimple")])
5181
5182 ;; Vector Extract Unsigned Word Left-Indexed
5183 (define_insn "vextuwlx"
5184   [(set (match_operand:SI 0 "register_operand" "=r")
5185         (unspec:SI
5186          [(match_operand:SI 1 "register_operand" "r")
5187           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5188          UNSPEC_VEXTUWLX))]
5189   "TARGET_P9_VECTOR"
5190   "vextuwlx %0,%1,%2"
5191   [(set_attr "type" "vecsimple")])
5192
5193 ;; Vector Extract Unsigned Word Right-Indexed
5194 (define_insn "vextuwrx"
5195   [(set (match_operand:SI 0 "register_operand" "=r")
5196         (unspec:SI
5197          [(match_operand:SI 1 "register_operand" "r")
5198           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5199          UNSPEC_VEXTUWRX))]
5200   "TARGET_P9_VECTOR"
5201   "vextuwrx %0,%1,%2"
5202   [(set_attr "type" "vecsimple")])
5203
5204 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
5205 ;; endian version needs to adjust the byte number, and the V4SI element in
5206 ;; vinsert4b.
5207 (define_expand "vextract4b"
5208   [(set (match_operand:DI 0 "gpc_reg_operand")
5209         (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
5210                     (match_operand:QI 2 "const_0_to_12_operand")]
5211                    UNSPEC_XXEXTRACTUW))]
5212   "TARGET_P9_VECTOR"
5213 {
5214   if (!VECTOR_ELT_ORDER_BIG)
5215     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5216 })
5217
5218 (define_insn_and_split "*vextract4b_internal"
5219   [(set (match_operand:DI 0 "gpc_reg_operand" "=wj,r")
5220         (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand" "wa,v")
5221                     (match_operand:QI 2 "const_0_to_12_operand" "n,n")]
5222                    UNSPEC_XXEXTRACTUW))]
5223   "TARGET_P9_VECTOR"
5224   "@
5225    xxextractuw %x0,%x1,%2
5226    #"
5227   "&& reload_completed && int_reg_operand (operands[0], DImode)"
5228   [(const_int 0)]
5229 {
5230   rtx op0 = operands[0];
5231   rtx op1 = operands[1];
5232   rtx op2 = operands[2];
5233   rtx op0_si = gen_rtx_REG (SImode, REGNO (op0));
5234   rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (op1));
5235
5236   emit_move_insn (op0, op2);
5237   if (VECTOR_ELT_ORDER_BIG)
5238     emit_insn (gen_vextuwlx (op0_si, op0_si, op1_v4si));
5239   else
5240     emit_insn (gen_vextuwrx (op0_si, op0_si, op1_v4si));
5241   DONE;
5242 }
5243   [(set_attr "type" "vecperm")])
5244
5245 (define_expand "vinsert4b"
5246   [(set (match_operand:V16QI 0 "vsx_register_operand")
5247         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5248                        (match_operand:V16QI 2 "vsx_register_operand")
5249                        (match_operand:QI 3 "const_0_to_12_operand")]
5250                    UNSPEC_XXINSERTW))]
5251   "TARGET_P9_VECTOR"
5252 {
5253   if (!VECTOR_ELT_ORDER_BIG)
5254     {
5255       rtx op1 = operands[1];
5256       rtx v4si_tmp = gen_reg_rtx (V4SImode);
5257       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5258       operands[1] = v4si_tmp;
5259       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5260     }
5261 })
5262
5263 (define_insn "*vinsert4b_internal"
5264   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5265         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5266                        (match_operand:V16QI 2 "vsx_register_operand" "0")
5267                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
5268                    UNSPEC_XXINSERTW))]
5269   "TARGET_P9_VECTOR"
5270   "xxinsertw %x0,%x1,%3"
5271   [(set_attr "type" "vecperm")])
5272
5273 (define_expand "vinsert4b_di"
5274   [(set (match_operand:V16QI 0 "vsx_register_operand")
5275         (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand")
5276                        (match_operand:V16QI 2 "vsx_register_operand")
5277                        (match_operand:QI 3 "const_0_to_12_operand")]
5278                    UNSPEC_XXINSERTW))]
5279   "TARGET_P9_VECTOR"
5280 {
5281   if (!VECTOR_ELT_ORDER_BIG)
5282     operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5283 })
5284
5285 (define_insn "*vinsert4b_di_internal"
5286   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5287         (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand" "wj")
5288                        (match_operand:V16QI 2 "vsx_register_operand" "0")
5289                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
5290                    UNSPEC_XXINSERTW))]
5291   "TARGET_P9_VECTOR"
5292   "xxinsertw %x0,%x1,%3"
5293   [(set_attr "type" "vecperm")])
5294
5295 ;; Generate vector extract four float 32 values from left four elements
5296 ;; of eight element vector of float 16 values.
5297 (define_expand "vextract_fp_from_shorth"
5298   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5299         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5300    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5301   "TARGET_P9_VECTOR"
5302 {
5303   int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5304   int i;
5305
5306   rtx rvals[16];
5307   rtx mask = gen_reg_rtx (V16QImode);
5308   rtx tmp = gen_reg_rtx (V16QImode);
5309   rtvec v;
5310
5311   for (i = 0; i < 16; i++)
5312     rvals[i] = GEN_INT (vals[i]);
5313
5314   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5315      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5316      src half words 0,1,2,3 for the conversion instruction.  */
5317   v = gen_rtvec_v (16, rvals);
5318   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5319   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5320                                           operands[1], mask));
5321   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5322   DONE;
5323 })
5324
5325 ;; Generate vector extract four float 32 values from right four elements
5326 ;; of eight element vector of float 16 values.
5327 (define_expand "vextract_fp_from_shortl"
5328   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5329         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5330         UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5331   "TARGET_P9_VECTOR"
5332 {
5333   int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5334   int i;
5335   rtx rvals[16];
5336   rtx mask = gen_reg_rtx (V16QImode);
5337   rtx tmp = gen_reg_rtx (V16QImode);
5338   rtvec v;
5339
5340   for (i = 0; i < 16; i++)
5341     rvals[i] = GEN_INT (vals[i]);
5342
5343   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5344      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5345      src half words 4,5,6,7 for the conversion instruction.  */
5346   v = gen_rtvec_v (16, rvals);
5347   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5348   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5349                                           operands[1], mask));
5350   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5351   DONE;
5352 })
5353
5354 ;; Support for ISA 3.0 vector byte reverse
5355
5356 ;; Swap all bytes with in a vector
5357 (define_insn "p9_xxbrq_v1ti"
5358   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5359         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5360   "TARGET_P9_VECTOR"
5361   "xxbrq %x0,%x1"
5362   [(set_attr "type" "vecperm")])
5363
5364 (define_expand "p9_xxbrq_v16qi"
5365   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5366    (use (match_operand:V16QI 1 "vsx_register_operand" "=wa"))]
5367   "TARGET_P9_VECTOR"
5368 {
5369   rtx op0 = gen_lowpart (V1TImode, operands[0]);
5370   rtx op1 = gen_lowpart (V1TImode, operands[1]);
5371   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5372   DONE;
5373 })
5374
5375 ;; Swap all bytes in each 64-bit element
5376 (define_insn "p9_xxbrd_<mode>"
5377   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
5378         (bswap:VSX_D (match_operand:VSX_D 1 "vsx_register_operand" "wa")))]
5379   "TARGET_P9_VECTOR"
5380   "xxbrd %x0,%x1"
5381   [(set_attr "type" "vecperm")])
5382
5383 ;; Swap all bytes in each 32-bit element
5384 (define_insn "p9_xxbrw_<mode>"
5385   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
5386         (bswap:VSX_W (match_operand:VSX_W 1 "vsx_register_operand" "wa")))]
5387   "TARGET_P9_VECTOR"
5388   "xxbrw %x0,%x1"
5389   [(set_attr "type" "vecperm")])
5390
5391 ;; Swap all bytes in each element of vector
5392 (define_expand "revb_<mode>"
5393   [(set (match_operand:VEC_REVB 0 "vsx_register_operand")
5394         (bswap:VEC_REVB (match_operand:VEC_REVB 1 "vsx_register_operand")))]
5395   ""
5396 {
5397   if (TARGET_P9_VECTOR)
5398     emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5399   else
5400     {
5401       /* Want to have the elements in reverse order relative
5402          to the endian mode in use, i.e. in LE mode, put elements
5403          in BE order.  */
5404       rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5405       emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5406                                            operands[1], sel));
5407     }
5408
5409   DONE;
5410 })
5411
5412 ;; Reversing bytes in vector char is just a NOP.
5413 (define_expand "revb_v16qi"
5414   [(set (match_operand:V16QI 0 "vsx_register_operand")
5415         (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5416   ""
5417 {
5418   emit_move_insn (operands[0], operands[1]);
5419   DONE;
5420 })
5421
5422 ;; Swap all bytes in each 16-bit element
5423 (define_insn "p9_xxbrh_v8hi"
5424   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5425         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5426   "TARGET_P9_VECTOR"
5427   "xxbrh %x0,%x1"
5428   [(set_attr "type" "vecperm")])
5429 \f
5430
5431 ;; Operand numbers for the following peephole2
5432 (define_constants
5433   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
5434    (SFBOOL_TMP_VSX               1)             ;; vector temporary
5435    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
5436    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
5437    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
5438    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
5439    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
5440    (SFBOOL_SHL_D                 7)             ;; shift left dest
5441    (SFBOOL_SHL_A                 8)             ;; shift left arg
5442    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
5443    (SFBOOL_MFVSR_A_V4SF         10)             ;; SFBOOL_MFVSR_A as V4SFmode
5444    (SFBOOL_BOOL_A_DI            11)             ;; SFBOOL_BOOL_A1/A2 as DImode
5445    (SFBOOL_TMP_VSX_DI           12)             ;; SFBOOL_TMP_VSX as DImode
5446    (SFBOOL_MTVSR_D_V4SF         13)])           ;; SFBOOL_MTVSRD_D as V4SFmode
5447
5448 ;; Attempt to optimize some common GLIBC operations using logical operations to
5449 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
5450 ;; after macro expansion that looks like:
5451 ;;
5452 ;;      typedef union {
5453 ;;        float value;
5454 ;;        uint32_t word;
5455 ;;      } ieee_float_shape_type;
5456 ;;
5457 ;;      float t1;
5458 ;;      int32_t is;
5459 ;;
5460 ;;      do {
5461 ;;        ieee_float_shape_type gf_u;
5462 ;;        gf_u.value = (t1);
5463 ;;        (is) = gf_u.word;
5464 ;;      } while (0);
5465 ;;
5466 ;;      do {
5467 ;;        ieee_float_shape_type sf_u;
5468 ;;        sf_u.word = (is & 0xfffff000);
5469 ;;        (t1) = sf_u.value;
5470 ;;      } while (0);
5471 ;;
5472 ;;
5473 ;; This would result in two direct move operations (convert to memory format,
5474 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5475 ;; scalar format).  With this peephole, we eliminate the direct move to the
5476 ;; GPR, and instead move the integer mask value to the vector register after a
5477 ;; shift and do the VSX logical operation.
5478
5479 ;; The insns for dealing with SFmode in GPR registers looks like:
5480 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5481 ;;
5482 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5483 ;;
5484 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5485 ;;
5486 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5487 ;;
5488 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5489 ;;
5490 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5491
5492 (define_peephole2
5493   [(match_scratch:DI SFBOOL_TMP_GPR "r")
5494    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5495
5496    ;; MFVSRWZ (aka zero_extend)
5497    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5498         (zero_extend:DI
5499          (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5500
5501    ;; AND/IOR/XOR operation on int
5502    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5503         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5504                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5505
5506    ;; SLDI
5507    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5508         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5509                    (const_int 32)))
5510
5511    ;; MTVSRD
5512    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5513         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5514
5515   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5516    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5517       to compare registers, when the mode is different.  */
5518    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5519    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5520    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5521    && (REG_P (operands[SFBOOL_BOOL_A2])
5522        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5523    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5524        || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5525    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5526        || (REG_P (operands[SFBOOL_BOOL_A2])
5527            && REGNO (operands[SFBOOL_MFVSR_D])
5528                 == REGNO (operands[SFBOOL_BOOL_A2])))
5529    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5530    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5531        || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5532    && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5533   [(set (match_dup SFBOOL_TMP_GPR)
5534         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5535                    (const_int 32)))
5536
5537    (set (match_dup SFBOOL_TMP_VSX_DI)
5538         (match_dup SFBOOL_TMP_GPR))
5539
5540    (set (match_dup SFBOOL_MTVSR_D_V4SF)
5541         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5542                           (match_dup SFBOOL_TMP_VSX)))]
5543 {
5544   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5545   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5546   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5547   int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5548   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5549   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5550
5551   if (CONST_INT_P (bool_a2))
5552     {
5553       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5554       emit_move_insn (tmp_gpr, bool_a2);
5555       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5556     }
5557   else
5558     {
5559       int regno_bool_a1 = REGNO (bool_a1);
5560       int regno_bool_a2 = REGNO (bool_a2);
5561       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5562                           ? regno_bool_a2 : regno_bool_a1);
5563       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5564     }
5565
5566   operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5567   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5568   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5569 })