gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Mode attribute for vector floate and floato conversions
  25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
  26
  27 ;; Iterator for both scalar and vector floating point types supported by VSX
  28 (define_mode_iterator VSX_B [DF V4SF V2DF])
  29
  30 ;; Iterator for the 2 64-bit vector types
  31 (define_mode_iterator VSX_D [V2DF V2DI])
  32
  33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  34 ;; types that goes in a single vector register.
  35 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  36                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  37                                   TI
  38                                   V1TI])
  39
  40 ;; Iterator for 128-bit integer types that go in a single vector register.
  41 (define_mode_iterator VSX_TI [TI V1TI])
  42
  43 ;; Iterator for the 2 32-bit vector types
  44 (define_mode_iterator VSX_W [V4SF V4SI])
  45
  46 ;; Iterator for the DF types
  47 (define_mode_iterator VSX_DF [V2DF DF])
  48
  49 ;; Iterator for vector floating point types supported by VSX
  50 (define_mode_iterator VSX_F [V4SF V2DF])
  51
  52 ;; Iterator for logical types supported by VSX
  53 (define_mode_iterator VSX_L [V16QI
  54                              V8HI
  55                              V4SI
  56                              V2DI
  57                              V4SF
  58                              V2DF
  59                              V1TI
  60                              TI
  61                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  62                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  63
  64 ;; Iterator for memory moves.
  65 (define_mode_iterator VSX_M [V16QI
  66                              V8HI
  67                              V4SI
  68                              V2DI
  69                              V4SF
  70                              V2DF
  71                              V1TI
  72                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  73                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  74                              TI])
  75
  76 (define_mode_attr VSX_XXBR  [(V8HI  "h")
  77                              (V4SI  "w")
  78                              (V4SF  "w")
  79                              (V2DF  "d")
  80                              (V2DI  "d")
  81                              (V1TI  "q")])
  82
  83 ;; Map into the appropriate load/store name based on the type
  84 (define_mode_attr VSm  [(V16QI "vw4")
  85                         (V8HI  "vw4")
  86                         (V4SI  "vw4")
  87                         (V4SF  "vw4")
  88                         (V2DF  "vd2")
  89                         (V2DI  "vd2")
  90                         (DF    "d")
  91                         (TF    "vd2")
  92                         (KF    "vd2")
  93                         (V1TI  "vd2")
  94                         (TI    "vd2")])
  95
  96 ;; Map into the appropriate suffix based on the type
  97 (define_mode_attr VSs   [(V16QI "sp")
  98                          (V8HI  "sp")
  99                          (V4SI  "sp")
 100                          (V4SF  "sp")
 101                          (V2DF  "dp")
 102                          (V2DI  "dp")
 103                          (DF    "dp")
 104                          (SF    "sp")
 105                          (TF    "dp")
 106                          (KF    "dp")
 107                          (V1TI  "dp")
 108                          (TI    "dp")])
 109
 110 ;; Map the register class used
 111 (define_mode_attr VSr   [(V16QI "v")
 112                          (V8HI  "v")
 113                          (V4SI  "v")
 114                          (V4SF  "wf")
 115                          (V2DI  "wd")
 116                          (V2DF  "wd")
 117                          (DI    "wi")
 118                          (DF    "ws")
 119                          (SF    "ww")
 120                          (TF    "wp")
 121                          (KF    "wq")
 122                          (V1TI  "v")
 123                          (TI    "wt")])
 124
 125 ;; Map the register class used for float<->int conversions (floating point side)
 126 ;; VSr2 is the preferred register class, VSr3 is any register class that will
 127 ;; hold the data
 128 (define_mode_attr VSr2  [(V2DF  "wd")
 129                          (V4SF  "wf")
 130                          (DF    "ws")
 131                          (SF    "ww")
 132                          (DI    "wi")
 133                          (KF    "wq")
 134                          (TF    "wp")])
 135
 136 (define_mode_attr VSr3  [(V2DF  "wa")
 137                          (V4SF  "wa")
 138                          (DF    "ws")
 139                          (SF    "ww")
 140                          (DI    "wi")
 141                          (KF    "wq")
 142                          (TF    "wp")])
 143
 144 ;; Map the register class for sp<->dp float conversions, destination
 145 (define_mode_attr VSr4  [(SF    "ws")
 146                          (DF    "f")
 147                          (V2DF  "wd")
 148                          (V4SF  "v")])
 149
 150 ;; Map the register class for sp<->dp float conversions, source
 151 (define_mode_attr VSr5  [(SF    "ws")
 152                          (DF    "f")
 153                          (V2DF  "v")
 154                          (V4SF  "wd")])
 155
 156 ;; The VSX register class that a type can occupy, even if it is not the
 157 ;; preferred register class (VSr is the preferred register class that will get
 158 ;; allocated first).
 159 (define_mode_attr VSa   [(V16QI "wa")
 160                          (V8HI  "wa")
 161                          (V4SI  "wa")
 162                          (V4SF  "wa")
 163                          (V2DI  "wa")
 164                          (V2DF  "wa")
 165                          (DI    "wi")
 166                          (DF    "ws")
 167                          (SF    "ww")
 168                          (V1TI  "wa")
 169                          (TI    "wt")
 170                          (TF    "wp")
 171                          (KF    "wq")])
 172
 173 ;; Same size integer type for floating point data
 174 (define_mode_attr VSi [(V4SF  "v4si")
 175                        (V2DF  "v2di")
 176                        (DF    "di")])
 177
 178 (define_mode_attr VSI [(V4SF  "V4SI")
 179                        (V2DF  "V2DI")
 180                        (DF    "DI")])
 181
 182 ;; Word size for same size conversion
 183 (define_mode_attr VSc [(V4SF "w")
 184                        (V2DF "d")
 185                        (DF   "d")])
 186
 187 ;; Map into either s or v, depending on whether this is a scalar or vector
 188 ;; operation
 189 (define_mode_attr VSv   [(V16QI "v")
 190                          (V8HI  "v")
 191                          (V4SI  "v")
 192                          (V4SF  "v")
 193                          (V2DI  "v")
 194                          (V2DF  "v")
 195                          (V1TI  "v")
 196                          (DF    "s")
 197                          (KF    "v")])
 198
 199 ;; Appropriate type for add ops (and other simple FP ops)
 200 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 201                                  (V4SF "vecfloat")
 202                                  (DF   "fp")])
 203
 204 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
 205                                    (V4SF "fp_addsub_s")
 206                                    (DF   "fp_addsub_d")])
 207
 208 ;; Appropriate type for multiply ops
 209 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 210                                  (V4SF "vecfloat")
 211                                  (DF   "dmul")])
 212
 213 (define_mode_attr VSfptype_mul  [(V2DF "fp_mul_d")
 214                                  (V4SF "fp_mul_s")
 215                                  (DF   "fp_mul_d")])
 216
 217 ;; Appropriate type for divide ops.
 218 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 219                                  (V4SF "vecfdiv")
 220                                  (DF   "ddiv")])
 221
 222 (define_mode_attr VSfptype_div  [(V2DF "fp_div_d")
 223                                  (V4SF "fp_div_s")
 224                                  (DF   "fp_div_d")])
 225
 226 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 227 ;; the scalar sqrt
 228 (define_mode_attr VStype_sqrt   [(V2DF "dsqrt")
 229                                  (V4SF "ssqrt")
 230                                  (DF   "dsqrt")])
 231
 232 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
 233                                  (V4SF "fp_sqrt_s")
 234                                  (DF   "fp_sqrt_d")])
 235
 236 ;; Iterator and modes for sp<->dp conversions
 237 ;; Because scalar SF values are represented internally as double, use the
 238 ;; V4SF type to represent this than SF.
 239 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
 240
 241 (define_mode_attr VS_spdp_res [(DF      "V4SF")
 242                                (V4SF    "V2DF")
 243                                (V2DF    "V4SF")])
 244
 245 (define_mode_attr VS_spdp_insn [(DF     "xscvdpsp")
 246                                 (V4SF   "xvcvspdp")
 247                                 (V2DF   "xvcvdpsp")])
 248
 249 (define_mode_attr VS_spdp_type [(DF     "fp")
 250                                 (V4SF   "vecdouble")
 251                                 (V2DF   "vecdouble")])
 252
 253 ;; Map the scalar mode for a vector type
 254 (define_mode_attr VS_scalar [(V1TI      "TI")
 255                              (V2DF      "DF")
 256                              (V2DI      "DI")
 257                              (V4SF      "SF")
 258                              (V4SI      "SI")
 259                              (V8HI      "HI")
 260                              (V16QI     "QI")])
 261
 262 ;; Map to a double-sized vector mode
 263 (define_mode_attr VS_double [(V4SI      "V8SI")
 264                              (V4SF      "V8SF")
 265                              (V2DI      "V4DI")
 266                              (V2DF      "V4DF")
 267                              (V1TI      "V2TI")])
 268
 269 ;; Map register class for 64-bit element in 128-bit vector for direct moves
 270 ;; to/from gprs
 271 (define_mode_attr VS_64dm [(V2DF        "wk")
 272                            (V2DI        "wj")])
 273
 274 ;; Map register class for 64-bit element in 128-bit vector for normal register
 275 ;; to register moves
 276 (define_mode_attr VS_64reg [(V2DF       "ws")
 277                             (V2DI       "wi")])
 278
 279 ;; Iterators for loading constants with xxspltib
 280 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 281 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 282
 283 ;; Vector reverse byte modes
 284 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 285
 286 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 287 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 288 ;; done on ISA 2.07 and not just ISA 3.0.
 289 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 290 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 291
 292 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 293                                      (V8HI "h")
 294                                      (V4SI "w")])
 295
 296 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 297 ;; insert to validate the operand number.
 298 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 299                                          (V8HI  "const_0_to_7_operand")
 300                                          (V4SI  "const_0_to_3_operand")])
 301
 302 ;; Mode attribute to give the constraint for vector extract and insert
 303 ;; operations.
 304 (define_mode_attr VSX_EX [(V16QI "v")
 305                           (V8HI  "v")
 306                           (V4SI  "wa")])
 307
 308 ;; Mode iterator for binary floating types other than double to
 309 ;; optimize convert to that floating point type from an extract
 310 ;; of an integer type
 311 (define_mode_iterator VSX_EXTRACT_FL [SF
 312                                       (IF "FLOAT128_2REG_P (IFmode)")
 313                                       (KF "TARGET_FLOAT128_HW")
 314                                       (TF "FLOAT128_2REG_P (TFmode)
 315                                            || (FLOAT128_IEEE_P (TFmode)
 316                                                && TARGET_FLOAT128_HW)")])
 317
 318 ;; Mode iterator for binary floating types that have a direct conversion
 319 ;; from 64-bit integer to floating point
 320 (define_mode_iterator FL_CONV [SF
 321                                DF
 322                                (KF "TARGET_FLOAT128_HW")
 323                                (TF "TARGET_FLOAT128_HW
 324                                     && FLOAT128_IEEE_P (TFmode)")])
 325
 326 ;; Iterator for the 2 short vector types to do a splat from an integer
 327 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 328
 329 ;; Mode attribute to give the count for the splat instruction to splat
 330 ;; the value in the 64-bit integer slot
 331 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 332
 333 ;; Mode attribute to give the suffix for the splat instruction
 334 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 335
 336 ;; Constants for creating unspecs
 337 (define_c_enum "unspec"
 338   [UNSPEC_VSX_CONCAT
 339    UNSPEC_VSX_CVDPSXWS
 340    UNSPEC_VSX_CVDPUXWS
 341    UNSPEC_VSX_CVSPDP
 342    UNSPEC_VSX_CVHPSP
 343    UNSPEC_VSX_CVSPDPN
 344    UNSPEC_VSX_CVDPSPN
 345    UNSPEC_VSX_CVSXWDP
 346    UNSPEC_VSX_CVUXWDP
 347    UNSPEC_VSX_CVSXDSP
 348    UNSPEC_VSX_CVUXDSP
 349    UNSPEC_VSX_CVSPSXDS
 350    UNSPEC_VSX_CVSPUXDS
 351    UNSPEC_VSX_CVSXWSP
 352    UNSPEC_VSX_CVUXWSP
 353    UNSPEC_VSX_FLOAT2
 354    UNSPEC_VSX_UNS_FLOAT2
 355    UNSPEC_VSX_FLOATE
 356    UNSPEC_VSX_UNS_FLOATE
 357    UNSPEC_VSX_FLOATO
 358    UNSPEC_VSX_UNS_FLOATO
 359    UNSPEC_VSX_TDIV
 360    UNSPEC_VSX_TSQRT
 361    UNSPEC_VSX_SET
 362    UNSPEC_VSX_ROUND_I
 363    UNSPEC_VSX_ROUND_IC
 364    UNSPEC_VSX_SLDWI
 365    UNSPEC_VSX_XXPERM
 366
 367    UNSPEC_VSX_XXSPLTW
 368    UNSPEC_VSX_XXSPLTD
 369    UNSPEC_VSX_DIVSD
 370    UNSPEC_VSX_DIVUD
 371    UNSPEC_VSX_MULSD
 372    UNSPEC_VSX_XVCVSXDDP
 373    UNSPEC_VSX_XVCVUXDDP
 374    UNSPEC_VSX_XVCVDPSXDS
 375    UNSPEC_VSX_XVCDPSP
 376    UNSPEC_VSX_XVCVDPUXDS
 377    UNSPEC_VSX_SIGN_EXTEND
 378    UNSPEC_VSX_XVCVSPSXWS
 379    UNSPEC_VSX_XVCVSPSXDS
 380    UNSPEC_VSX_VSLO
 381    UNSPEC_VSX_EXTRACT
 382    UNSPEC_VSX_SXEXPDP
 383    UNSPEC_VSX_SXSIG
 384    UNSPEC_VSX_SIEXPDP
 385    UNSPEC_VSX_SIEXPQP
 386    UNSPEC_VSX_SCMPEXPDP
 387    UNSPEC_VSX_STSTDC
 388    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
 389    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
 390    UNSPEC_VSX_VXEXP
 391    UNSPEC_VSX_VXSIG
 392    UNSPEC_VSX_VIEXP
 393    UNSPEC_VSX_VTSTDC
 394    UNSPEC_VSX_VEC_INIT
 395    UNSPEC_VSX_VSIGNED2
 396
 397    UNSPEC_LXVL
 398    UNSPEC_LXVLL
 399    UNSPEC_LVSL_REG
 400    UNSPEC_LVSR_REG
 401    UNSPEC_STXVL
 402    UNSPEC_STXVLL
 403    UNSPEC_XL_LEN_R
 404    UNSPEC_XST_LEN_R
 405
 406    UNSPEC_VCLZLSBB
 407    UNSPEC_VCTZLSBB
 408    UNSPEC_VEXTUBLX
 409    UNSPEC_VEXTUHLX
 410    UNSPEC_VEXTUWLX
 411    UNSPEC_VEXTUBRX
 412    UNSPEC_VEXTUHRX
 413    UNSPEC_VEXTUWRX
 414    UNSPEC_VCMPNEB
 415    UNSPEC_VCMPNEZB
 416    UNSPEC_VCMPNEH
 417    UNSPEC_VCMPNEZH
 418    UNSPEC_VCMPNEW
 419    UNSPEC_VCMPNEZW
 420    UNSPEC_XXEXTRACTUW
 421    UNSPEC_XXINSERTW
 422    UNSPEC_VSX_FIRST_MATCH_INDEX
 423    UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
 424    UNSPEC_VSX_FIRST_MISMATCH_INDEX
 425    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
 426   ])
 427
 428 ;; VSX moves
 429
 430 ;; The patterns for LE permuted loads and stores come before the general
 431 ;; VSX moves so they match first.
 432 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 433   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
 434         (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
 435   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 436   "#"
 437   "&& 1"
 438   [(set (match_dup 2)
 439         (vec_select:<MODE>
 440           (match_dup 1)
 441           (parallel [(const_int 1) (const_int 0)])))
 442    (set (match_dup 0)
 443         (vec_select:<MODE>
 444           (match_dup 2)
 445           (parallel [(const_int 1) (const_int 0)])))]
 446 {
 447   rtx mem = operands[1];
 448
 449   /* Don't apply the swap optimization if we've already performed register
 450      allocation and the hard register destination is not in the altivec
 451      range.  */
 452   if ((MEM_ALIGN (mem) >= 128)
 453       && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
 454           || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
 455     {
 456       rtx mem_address = XEXP (mem, 0);
 457       enum machine_mode mode = GET_MODE (mem);
 458
 459       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 460         {
 461           /* Replace the source memory address with masked address.  */
 462           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 463           emit_insn (lvx_set_expr);
 464           DONE;
 465         }
 466       else if (rs6000_quadword_masked_address_p (mem_address))
 467         {
 468           /* This rtl is already in the form that matches lvx
 469              instruction, so leave it alone.  */
 470           DONE;
 471         }
 472       /* Otherwise, fall through to transform into a swapping load.  */
 473     }
 474   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 475                                        : operands[0];
 476 }
 477   [(set_attr "type" "vecload")
 478    (set_attr "length" "8")])
 479
 480 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 481   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
 482         (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
 483   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 484   "#"
 485   "&& 1"
 486   [(set (match_dup 2)
 487         (vec_select:<MODE>
 488           (match_dup 1)
 489           (parallel [(const_int 2) (const_int 3)
 490                      (const_int 0) (const_int 1)])))
 491    (set (match_dup 0)
 492         (vec_select:<MODE>
 493           (match_dup 2)
 494           (parallel [(const_int 2) (const_int 3)
 495                      (const_int 0) (const_int 1)])))]
 496 {
 497   rtx mem = operands[1];
 498
 499   /* Don't apply the swap optimization if we've already performed register
 500      allocation and the hard register destination is not in the altivec
 501      range.  */
 502   if ((MEM_ALIGN (mem) >= 128)
 503       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 504           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 505     {
 506       rtx mem_address = XEXP (mem, 0);
 507       enum machine_mode mode = GET_MODE (mem);
 508
 509       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 510         {
 511           /* Replace the source memory address with masked address.  */
 512           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 513           emit_insn (lvx_set_expr);
 514           DONE;
 515         }
 516       else if (rs6000_quadword_masked_address_p (mem_address))
 517         {
 518           /* This rtl is already in the form that matches lvx
 519              instruction, so leave it alone.  */
 520           DONE;
 521         }
 522       /* Otherwise, fall through to transform into a swapping load.  */
 523     }
 524   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 525                                        : operands[0];
 526 }
 527   [(set_attr "type" "vecload")
 528    (set_attr "length" "8")])
 529
 530 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 531   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 532         (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
 533   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 534   "#"
 535   "&& 1"
 536   [(set (match_dup 2)
 537         (vec_select:V8HI
 538           (match_dup 1)
 539           (parallel [(const_int 4) (const_int 5)
 540                      (const_int 6) (const_int 7)
 541                      (const_int 0) (const_int 1)
 542                      (const_int 2) (const_int 3)])))
 543    (set (match_dup 0)
 544         (vec_select:V8HI
 545           (match_dup 2)
 546           (parallel [(const_int 4) (const_int 5)
 547                      (const_int 6) (const_int 7)
 548                      (const_int 0) (const_int 1)
 549                      (const_int 2) (const_int 3)])))]
 550 {
 551   rtx mem = operands[1];
 552
 553   /* Don't apply the swap optimization if we've already performed register
 554      allocation and the hard register destination is not in the altivec
 555      range.  */
 556   if ((MEM_ALIGN (mem) >= 128)
 557       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 558           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 559     {
 560       rtx mem_address = XEXP (mem, 0);
 561       enum machine_mode mode = GET_MODE (mem);
 562
 563       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 564         {
 565           /* Replace the source memory address with masked address.  */
 566           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 567           emit_insn (lvx_set_expr);
 568           DONE;
 569         }
 570       else if (rs6000_quadword_masked_address_p (mem_address))
 571         {
 572           /* This rtl is already in the form that matches lvx
 573              instruction, so leave it alone.  */
 574           DONE;
 575         }
 576       /* Otherwise, fall through to transform into a swapping load.  */
 577     }
 578   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 579                                        : operands[0];
 580 }
 581   [(set_attr "type" "vecload")
 582    (set_attr "length" "8")])
 583
 584 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 585   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 586         (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
 587   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 588   "#"
 589   "&& 1"
 590   [(set (match_dup 2)
 591         (vec_select:V16QI
 592           (match_dup 1)
 593           (parallel [(const_int 8) (const_int 9)
 594                      (const_int 10) (const_int 11)
 595                      (const_int 12) (const_int 13)
 596                      (const_int 14) (const_int 15)
 597                      (const_int 0) (const_int 1)
 598                      (const_int 2) (const_int 3)
 599                      (const_int 4) (const_int 5)
 600                      (const_int 6) (const_int 7)])))
 601    (set (match_dup 0)
 602         (vec_select:V16QI
 603           (match_dup 2)
 604           (parallel [(const_int 8) (const_int 9)
 605                      (const_int 10) (const_int 11)
 606                      (const_int 12) (const_int 13)
 607                      (const_int 14) (const_int 15)
 608                      (const_int 0) (const_int 1)
 609                      (const_int 2) (const_int 3)
 610                      (const_int 4) (const_int 5)
 611                      (const_int 6) (const_int 7)])))]
 612 {
 613   rtx mem = operands[1];
 614
 615   /* Don't apply the swap optimization if we've already performed register
 616      allocation and the hard register destination is not in the altivec
 617      range.  */
 618   if ((MEM_ALIGN (mem) >= 128)
 619       && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
 620           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 621     {
 622       rtx mem_address = XEXP (mem, 0);
 623       enum machine_mode mode = GET_MODE (mem);
 624
 625       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 626         {
 627           /* Replace the source memory address with masked address.  */
 628           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 629           emit_insn (lvx_set_expr);
 630           DONE;
 631         }
 632       else if (rs6000_quadword_masked_address_p (mem_address))
 633         {
 634           /* This rtl is already in the form that matches lvx
 635              instruction, so leave it alone.  */
 636           DONE;
 637         }
 638       /* Otherwise, fall through to transform into a swapping load.  */
 639     }
 640   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 641                                        : operands[0];
 642 }
 643   [(set_attr "type" "vecload")
 644    (set_attr "length" "8")])
 645
 646 (define_insn "*vsx_le_perm_store_<mode>"
 647   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
 648         (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
 649   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 650   "#"
 651   [(set_attr "type" "vecstore")
 652    (set_attr "length" "12")])
 653
 654 (define_split
 655   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 656         (match_operand:VSX_D 1 "vsx_register_operand"))]
 657   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 658   [(set (match_dup 2)
 659         (vec_select:<MODE>
 660           (match_dup 1)
 661           (parallel [(const_int 1) (const_int 0)])))
 662    (set (match_dup 0)
 663         (vec_select:<MODE>
 664           (match_dup 2)
 665           (parallel [(const_int 1) (const_int 0)])))]
 666 {
 667   rtx mem = operands[0];
 668
 669   /* Don't apply the swap optimization if we've already performed register
 670      allocation and the hard register source is not in the altivec range.  */
 671   if ((MEM_ALIGN (mem) >= 128)
 672       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 673           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 674     {
 675       rtx mem_address = XEXP (mem, 0);
 676       enum machine_mode mode = GET_MODE (mem);
 677       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 678         {
 679           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 680           emit_insn (stvx_set_expr);
 681           DONE;
 682         }
 683       else if (rs6000_quadword_masked_address_p (mem_address))
 684         {
 685           /* This rtl is already in the form that matches stvx instruction,
 686              so leave it alone.  */
 687           DONE;
 688         }
 689       /* Otherwise, fall through to transform into a swapping store.  */
 690     }
 691
 692   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 693                                        : operands[1];
 694 })
 695
 696 ;; The post-reload split requires that we re-permute the source
 697 ;; register in case it is still live.
 698 (define_split
 699   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 700         (match_operand:VSX_D 1 "vsx_register_operand"))]
 701   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 702   [(set (match_dup 1)
 703         (vec_select:<MODE>
 704           (match_dup 1)
 705           (parallel [(const_int 1) (const_int 0)])))
 706    (set (match_dup 0)
 707         (vec_select:<MODE>
 708           (match_dup 1)
 709           (parallel [(const_int 1) (const_int 0)])))
 710    (set (match_dup 1)
 711         (vec_select:<MODE>
 712           (match_dup 1)
 713           (parallel [(const_int 1) (const_int 0)])))]
 714   "")
 715
 716 (define_insn "*vsx_le_perm_store_<mode>"
 717   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
 718         (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
 719   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 720   "#"
 721   [(set_attr "type" "vecstore")
 722    (set_attr "length" "12")])
 723
 724 (define_split
 725   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 726         (match_operand:VSX_W 1 "vsx_register_operand"))]
 727   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 728   [(set (match_dup 2)
 729         (vec_select:<MODE>
 730           (match_dup 1)
 731           (parallel [(const_int 2) (const_int 3)
 732                      (const_int 0) (const_int 1)])))
 733    (set (match_dup 0)
 734         (vec_select:<MODE>
 735           (match_dup 2)
 736           (parallel [(const_int 2) (const_int 3)
 737                      (const_int 0) (const_int 1)])))]
 738 {
 739   rtx mem = operands[0];
 740
 741   /* Don't apply the swap optimization if we've already performed register
 742      allocation and the hard register source is not in the altivec range.  */
 743   if ((MEM_ALIGN (mem) >= 128)
 744       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 745           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 746     {
 747       rtx mem_address = XEXP (mem, 0);
 748       enum machine_mode mode = GET_MODE (mem);
 749       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 750         {
 751           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 752           emit_insn (stvx_set_expr);
 753           DONE;
 754         }
 755       else if (rs6000_quadword_masked_address_p (mem_address))
 756         {
 757           /* This rtl is already in the form that matches stvx instruction,
 758              so leave it alone.  */
 759           DONE;
 760         }
 761       /* Otherwise, fall through to transform into a swapping store.  */
 762     }
 763
 764   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 765                                        : operands[1];
 766 })
 767
 768 ;; The post-reload split requires that we re-permute the source
 769 ;; register in case it is still live.
 770 (define_split
 771   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 772         (match_operand:VSX_W 1 "vsx_register_operand"))]
 773   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 774   [(set (match_dup 1)
 775         (vec_select:<MODE>
 776           (match_dup 1)
 777           (parallel [(const_int 2) (const_int 3)
 778                      (const_int 0) (const_int 1)])))
 779    (set (match_dup 0)
 780         (vec_select:<MODE>
 781           (match_dup 1)
 782           (parallel [(const_int 2) (const_int 3)
 783                      (const_int 0) (const_int 1)])))
 784    (set (match_dup 1)
 785         (vec_select:<MODE>
 786           (match_dup 1)
 787           (parallel [(const_int 2) (const_int 3)
 788                      (const_int 0) (const_int 1)])))]
 789   "")
 790
 791 (define_insn "*vsx_le_perm_store_v8hi"
 792   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
 793         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 794   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 795   "#"
 796   [(set_attr "type" "vecstore")
 797    (set_attr "length" "12")])
 798
 799 (define_split
 800   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 801         (match_operand:V8HI 1 "vsx_register_operand"))]
 802   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 803   [(set (match_dup 2)
 804         (vec_select:V8HI
 805           (match_dup 1)
 806           (parallel [(const_int 4) (const_int 5)
 807                      (const_int 6) (const_int 7)
 808                      (const_int 0) (const_int 1)
 809                      (const_int 2) (const_int 3)])))
 810    (set (match_dup 0)
 811         (vec_select:V8HI
 812           (match_dup 2)
 813           (parallel [(const_int 4) (const_int 5)
 814                      (const_int 6) (const_int 7)
 815                      (const_int 0) (const_int 1)
 816                      (const_int 2) (const_int 3)])))]
 817 {
 818   rtx mem = operands[0];
 819
 820   /* Don't apply the swap optimization if we've already performed register
 821      allocation and the hard register source is not in the altivec range.  */
 822   if ((MEM_ALIGN (mem) >= 128)
 823       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 824           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 825     {
 826       rtx mem_address = XEXP (mem, 0);
 827       enum machine_mode mode = GET_MODE (mem);
 828       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 829         {
 830           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 831           emit_insn (stvx_set_expr);
 832           DONE;
 833         }
 834       else if (rs6000_quadword_masked_address_p (mem_address))
 835         {
 836           /* This rtl is already in the form that matches stvx instruction,
 837              so leave it alone.  */
 838           DONE;
 839         }
 840       /* Otherwise, fall through to transform into a swapping store.  */
 841     }
 842
 843   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 844                                        : operands[1];
 845 })
 846
 847 ;; The post-reload split requires that we re-permute the source
 848 ;; register in case it is still live.
 849 (define_split
 850   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 851         (match_operand:V8HI 1 "vsx_register_operand"))]
 852   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 853   [(set (match_dup 1)
 854         (vec_select:V8HI
 855           (match_dup 1)
 856           (parallel [(const_int 4) (const_int 5)
 857                      (const_int 6) (const_int 7)
 858                      (const_int 0) (const_int 1)
 859                      (const_int 2) (const_int 3)])))
 860    (set (match_dup 0)
 861         (vec_select:V8HI
 862           (match_dup 1)
 863           (parallel [(const_int 4) (const_int 5)
 864                      (const_int 6) (const_int 7)
 865                      (const_int 0) (const_int 1)
 866                      (const_int 2) (const_int 3)])))
 867    (set (match_dup 1)
 868         (vec_select:V8HI
 869           (match_dup 1)
 870           (parallel [(const_int 4) (const_int 5)
 871                      (const_int 6) (const_int 7)
 872                      (const_int 0) (const_int 1)
 873                      (const_int 2) (const_int 3)])))]
 874   "")
 875
 876 (define_insn "*vsx_le_perm_store_v16qi"
 877   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
 878         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 879   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 880   "#"
 881   [(set_attr "type" "vecstore")
 882    (set_attr "length" "12")])
 883
 884 (define_split
 885   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 886         (match_operand:V16QI 1 "vsx_register_operand"))]
 887   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 888   [(set (match_dup 2)
 889         (vec_select:V16QI
 890           (match_dup 1)
 891           (parallel [(const_int 8) (const_int 9)
 892                      (const_int 10) (const_int 11)
 893                      (const_int 12) (const_int 13)
 894                      (const_int 14) (const_int 15)
 895                      (const_int 0) (const_int 1)
 896                      (const_int 2) (const_int 3)
 897                      (const_int 4) (const_int 5)
 898                      (const_int 6) (const_int 7)])))
 899    (set (match_dup 0)
 900         (vec_select:V16QI
 901           (match_dup 2)
 902           (parallel [(const_int 8) (const_int 9)
 903                      (const_int 10) (const_int 11)
 904                      (const_int 12) (const_int 13)
 905                      (const_int 14) (const_int 15)
 906                      (const_int 0) (const_int 1)
 907                      (const_int 2) (const_int 3)
 908                      (const_int 4) (const_int 5)
 909                      (const_int 6) (const_int 7)])))]
 910 {
 911   rtx mem = operands[0];
 912
 913   /* Don't apply the swap optimization if we've already performed register
 914      allocation and the hard register source is not in the altivec range.  */
 915   if ((MEM_ALIGN (mem) >= 128)
 916       && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
 917           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 918     {
 919       rtx mem_address = XEXP (mem, 0);
 920       enum machine_mode mode = GET_MODE (mem);
 921       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 922         {
 923           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 924           emit_insn (stvx_set_expr);
 925           DONE;
 926         }
 927       else if (rs6000_quadword_masked_address_p (mem_address))
 928         {
 929           /* This rtl is already in the form that matches stvx instruction,
 930              so leave it alone.  */
 931           DONE;
 932         }
 933       /* Otherwise, fall through to transform into a swapping store.  */
 934     }
 935
 936   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 937                                        : operands[1];
 938 })
 939
 940 ;; The post-reload split requires that we re-permute the source
 941 ;; register in case it is still live.
 942 (define_split
 943   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 944         (match_operand:V16QI 1 "vsx_register_operand"))]
 945   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 946   [(set (match_dup 1)
 947         (vec_select:V16QI
 948           (match_dup 1)
 949           (parallel [(const_int 8) (const_int 9)
 950                      (const_int 10) (const_int 11)
 951                      (const_int 12) (const_int 13)
 952                      (const_int 14) (const_int 15)
 953                      (const_int 0) (const_int 1)
 954                      (const_int 2) (const_int 3)
 955                      (const_int 4) (const_int 5)
 956                      (const_int 6) (const_int 7)])))
 957    (set (match_dup 0)
 958         (vec_select:V16QI
 959           (match_dup 1)
 960           (parallel [(const_int 8) (const_int 9)
 961                      (const_int 10) (const_int 11)
 962                      (const_int 12) (const_int 13)
 963                      (const_int 14) (const_int 15)
 964                      (const_int 0) (const_int 1)
 965                      (const_int 2) (const_int 3)
 966                      (const_int 4) (const_int 5)
 967                      (const_int 6) (const_int 7)])))
 968    (set (match_dup 1)
 969         (vec_select:V16QI
 970           (match_dup 1)
 971           (parallel [(const_int 8) (const_int 9)
 972                      (const_int 10) (const_int 11)
 973                      (const_int 12) (const_int 13)
 974                      (const_int 14) (const_int 15)
 975                      (const_int 0) (const_int 1)
 976                      (const_int 2) (const_int 3)
 977                      (const_int 4) (const_int 5)
 978                      (const_int 6) (const_int 7)])))]
 979   "")
 980
 981 ;; Little endian word swapping for 128-bit types that are either scalars or the
 982 ;; special V1TI container class, which it is not appropriate to use vec_select
 983 ;; for the type.
 984 (define_insn "*vsx_le_permute_<mode>"
 985   [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
 986         (rotate:VSX_TI
 987          (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
 988          (const_int 64)))]
 989   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 990   "@
 991    xxpermdi %x0,%x1,%x1,2
 992    lxvd2x %x0,%y1
 993    stxvd2x %x1,%y0
 994    mr %0,%L1\;mr %L0,%1
 995    ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
 996    std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
 997   [(set_attr "length" "4,4,4,8,8,8")
 998    (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
 999
1000 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
1001   [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
1002         (rotate:VSX_TI
1003          (rotate:VSX_TI
1004           (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
1005           (const_int 64))
1006          (const_int 64)))]
1007   "!BYTES_BIG_ENDIAN && TARGET_VSX"
1008   "@
1009    #
1010    xxlor %x0,%x1"
1011   ""
1012   [(set (match_dup 0) (match_dup 1))]
1013 {
1014   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1015     {
1016       emit_note (NOTE_INSN_DELETED);
1017       DONE;
1018     }
1019 }
1020   [(set_attr "length" "0,4")
1021    (set_attr "type" "veclogical")])
1022
1023 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1024   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1025         (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1026   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1027   "@
1028    #
1029    #"
1030   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1031   [(const_int 0)]
1032 {
1033   rtx tmp = (can_create_pseudo_p ()
1034              ? gen_reg_rtx_and_attrs (operands[0])
1035              : operands[0]);
1036   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1037   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1038   DONE;
1039 }
1040   [(set_attr "type" "vecload,load")
1041    (set_attr "length" "8,8")])
1042
1043 (define_insn "*vsx_le_perm_store_<mode>"
1044   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1045         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
1046   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1047   "@
1048    #
1049    #"
1050   [(set_attr "type" "vecstore,store")
1051    (set_attr "length" "12,8")])
1052
1053 (define_split
1054   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1055         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1056   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1057   [(const_int 0)]
1058 {
1059   rtx tmp = (can_create_pseudo_p ()
1060              ? gen_reg_rtx_and_attrs (operands[0])
1061              : operands[0]);
1062   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1063   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1064   DONE;
1065 })
1066
1067 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1068 ;; GPR registers on a little endian system.
1069 (define_peephole2
1070   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1071         (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1072                        (const_int 64)))
1073    (set (match_operand:VSX_TI 2 "int_reg_operand")
1074         (rotate:VSX_TI (match_dup 0)
1075                        (const_int 64)))]
1076   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1077    && (rtx_equal_p (operands[0], operands[2])
1078        || peep2_reg_dead_p (2, operands[0]))"
1079    [(set (match_dup 2) (match_dup 1))])
1080
1081 (define_peephole2
1082   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1083         (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1084                        (const_int 64)))
1085    (set (match_operand:VSX_TI 2 "memory_operand")
1086         (rotate:VSX_TI (match_dup 0)
1087                        (const_int 64)))]
1088   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1089    && peep2_reg_dead_p (2, operands[0])"
1090    [(set (match_dup 2) (match_dup 1))])
1091
1092 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1093 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1094 ;; floating point are handled by the more generic swap elimination pass.
1095 (define_peephole2
1096   [(set (match_operand:TI 0 "vsx_register_operand")
1097         (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1098                    (const_int 64)))
1099    (set (match_operand:TI 2 "vsx_register_operand")
1100         (rotate:TI (match_dup 0)
1101                    (const_int 64)))]
1102   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1103    && (rtx_equal_p (operands[0], operands[2])
1104        || peep2_reg_dead_p (2, operands[0]))"
1105    [(set (match_dup 2) (match_dup 1))])
1106
1107 ;; The post-reload split requires that we re-permute the source
1108 ;; register in case it is still live.
1109 (define_split
1110   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1111         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1112   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1113   [(const_int 0)]
1114 {
1115   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1116   rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1117   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1118   DONE;
1119 })
1120
1121 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1122 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1123 (define_insn "xxspltib_v16qi"
1124   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1125         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1126   "TARGET_P9_VECTOR"
1127 {
1128   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1129   return "xxspltib %x0,%2";
1130 }
1131   [(set_attr "type" "vecperm")])
1132
1133 (define_insn "xxspltib_<mode>_nosplit"
1134   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1135         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1136   "TARGET_P9_VECTOR"
1137 {
1138   rtx op1 = operands[1];
1139   int value = 256;
1140   int num_insns = -1;
1141
1142   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1143       || num_insns != 1)
1144     gcc_unreachable ();
1145
1146   operands[2] = GEN_INT (value & 0xff);
1147   return "xxspltib %x0,%2";
1148 }
1149   [(set_attr "type" "vecperm")])
1150
1151 (define_insn_and_split "*xxspltib_<mode>_split"
1152   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1153         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1154   "TARGET_P9_VECTOR"
1155   "#"
1156   "&& 1"
1157   [(const_int 0)]
1158 {
1159   int value = 256;
1160   int num_insns = -1;
1161   rtx op0 = operands[0];
1162   rtx op1 = operands[1];
1163   rtx tmp = ((can_create_pseudo_p ())
1164              ? gen_reg_rtx (V16QImode)
1165              : gen_lowpart (V16QImode, op0));
1166
1167   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1168       || num_insns != 2)
1169     gcc_unreachable ();
1170
1171   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1172
1173   if (<MODE>mode == V2DImode)
1174     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1175
1176   else if (<MODE>mode == V4SImode)
1177     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1178
1179   else if (<MODE>mode == V8HImode)
1180     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1181
1182   else
1183     gcc_unreachable ();
1184
1185   DONE;
1186 }
1187   [(set_attr "type" "vecperm")
1188    (set_attr "length" "8")])
1189
1190
1191 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1192 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1193 ;; all 1's, since the machine does not have to wait for the previous
1194 ;; instruction using the register being set (such as a store waiting on a slow
1195 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1196
1197 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1198 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1199 ;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
1200 (define_insn "*vsx_mov<mode>_64bit"
1201   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1202                "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
1203                 ?&r,       ??r,       ??Y,       ??r,       wo,        v,
1204                 ?<VSa>,    *r,        v,         ??r,       wZ,        v")
1205
1206         (match_operand:VSX_M 1 "input_operand"
1207                "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
1208                 wQ,        Y,         r,         r,         wE,        jwM,
1209                 ?jwM,      jwM,       W,         W,         v,         wZ"))]
1210
1211   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1212    && (register_operand (operands[0], <MODE>mode)
1213        || register_operand (operands[1], <MODE>mode))"
1214 {
1215   return rs6000_output_move_128bit (operands);
1216 }
1217   [(set_attr "type"
1218                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1219                 store,     load,      store,     *,         vecsimple, vecsimple,
1220                 vecsimple, *,         *,         *,         vecstore,  vecload")
1221
1222    (set_attr "length"
1223                "4,         4,         4,         8,         4,         8,
1224                 8,         8,         8,         8,         4,         4,
1225                 4,         8,         20,        20,        4,         4")])
1226
1227 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1228 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
1229 ;;              LVX (VMX)  STVX (VMX)
1230 (define_insn "*vsx_mov<mode>_32bit"
1231   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1232                "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       ??r,
1233                 wo,        v,         ?<VSa>,    *r,        v,         ??r,
1234                 wZ,        v")
1235
1236         (match_operand:VSX_M 1 "input_operand"
1237                "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
1238                 wE,        jwM,       ?jwM,      jwM,       W,         W,
1239                 v,         wZ"))]
1240
1241   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1242    && (register_operand (operands[0], <MODE>mode)
1243        || register_operand (operands[1], <MODE>mode))"
1244 {
1245   return rs6000_output_move_128bit (operands);
1246 }
1247   [(set_attr "type"
1248                "vecstore,  vecload,   vecsimple, load,      store,    *,
1249                 vecsimple, vecsimple, vecsimple, *,         *,        *,
1250                 vecstore,  vecload")
1251
1252    (set_attr "length"
1253                "4,         4,         4,         16,        16,        16,
1254                 4,         4,         4,         16,        20,        32,
1255                 4,         4")])
1256
1257 ;; Explicit  load/store expanders for the builtin functions
1258 (define_expand "vsx_load_<mode>"
1259   [(set (match_operand:VSX_M 0 "vsx_register_operand")
1260         (match_operand:VSX_M 1 "memory_operand"))]
1261   "VECTOR_MEM_VSX_P (<MODE>mode)"
1262 {
1263   /* Expand to swaps if needed, prior to swap optimization.  */
1264   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1265     {
1266       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1267       DONE;
1268     }
1269 })
1270
1271 (define_expand "vsx_store_<mode>"
1272   [(set (match_operand:VSX_M 0 "memory_operand")
1273         (match_operand:VSX_M 1 "vsx_register_operand"))]
1274   "VECTOR_MEM_VSX_P (<MODE>mode)"
1275 {
1276   /* Expand to swaps if needed, prior to swap optimization.  */
1277   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1278     {
1279       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1280       DONE;
1281     }
1282 })
1283
1284 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1285 ;; when you really want their element-reversing behavior.
1286 (define_insn "vsx_ld_elemrev_v2di"
1287   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1288         (vec_select:V2DI
1289           (match_operand:V2DI 1 "memory_operand" "Z")
1290           (parallel [(const_int 1) (const_int 0)])))]
1291   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1292   "lxvd2x %x0,%y1"
1293   [(set_attr "type" "vecload")])
1294
1295 (define_insn "vsx_ld_elemrev_v1ti"
1296   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1297         (vec_select:V1TI
1298           (match_operand:V1TI 1 "memory_operand" "Z")
1299           (parallel [(const_int 0)])))]
1300   "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1301 {
1302    return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1303 }
1304   [(set_attr "type" "vecload")])
1305
1306 (define_insn "vsx_ld_elemrev_v2df"
1307   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1308         (vec_select:V2DF
1309           (match_operand:V2DF 1 "memory_operand" "Z")
1310           (parallel [(const_int 1) (const_int 0)])))]
1311   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1312   "lxvd2x %x0,%y1"
1313   [(set_attr "type" "vecload")])
1314
1315 (define_insn "vsx_ld_elemrev_v4si"
1316   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1317         (vec_select:V4SI
1318           (match_operand:V4SI 1 "memory_operand" "Z")
1319           (parallel [(const_int 3) (const_int 2)
1320                      (const_int 1) (const_int 0)])))]
1321   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1322   "lxvw4x %x0,%y1"
1323   [(set_attr "type" "vecload")])
1324
1325 (define_insn "vsx_ld_elemrev_v4sf"
1326   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1327         (vec_select:V4SF
1328           (match_operand:V4SF 1 "memory_operand" "Z")
1329           (parallel [(const_int 3) (const_int 2)
1330                      (const_int 1) (const_int 0)])))]
1331   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1332   "lxvw4x %x0,%y1"
1333   [(set_attr "type" "vecload")])
1334
1335 (define_expand "vsx_ld_elemrev_v8hi"
1336   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1337         (vec_select:V8HI
1338           (match_operand:V8HI 1 "memory_operand" "Z")
1339           (parallel [(const_int 7) (const_int 6)
1340                      (const_int 5) (const_int 4)
1341                      (const_int 3) (const_int 2)
1342                      (const_int 1) (const_int 0)])))]
1343   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1344 {
1345   if (!TARGET_P9_VECTOR)
1346     {
1347       rtx tmp = gen_reg_rtx (V4SImode);
1348       rtx subreg, subreg2, perm[16], pcv;
1349       /* 2 is leftmost element in register */
1350       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1351       int i;
1352
1353       subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1354       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1355       subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1356
1357       for (i = 0; i < 16; ++i)
1358         perm[i] = GEN_INT (reorder[i]);
1359
1360       pcv = force_reg (V16QImode,
1361                        gen_rtx_CONST_VECTOR (V16QImode,
1362                                              gen_rtvec_v (16, perm)));
1363       emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1364                                                 subreg2, pcv));
1365       DONE;
1366     }
1367 })
1368
1369 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1370   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1371         (vec_select:V8HI
1372           (match_operand:V8HI 1 "memory_operand" "Z")
1373           (parallel [(const_int 7) (const_int 6)
1374                      (const_int 5) (const_int 4)
1375                      (const_int 3) (const_int 2)
1376                      (const_int 1) (const_int 0)])))]
1377   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1378   "lxvh8x %x0,%y1"
1379   [(set_attr "type" "vecload")])
1380
1381 (define_expand "vsx_ld_elemrev_v16qi"
1382   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1383         (vec_select:V16QI
1384           (match_operand:V16QI 1 "memory_operand" "Z")
1385           (parallel [(const_int 15) (const_int 14)
1386                      (const_int 13) (const_int 12)
1387                      (const_int 11) (const_int 10)
1388                      (const_int  9) (const_int  8)
1389                      (const_int  7) (const_int  6)
1390                      (const_int  5) (const_int  4)
1391                      (const_int  3) (const_int  2)
1392                      (const_int  1) (const_int  0)])))]
1393   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1394 {
1395   if (!TARGET_P9_VECTOR)
1396     {
1397       rtx tmp = gen_reg_rtx (V4SImode);
1398       rtx subreg, subreg2, perm[16], pcv;
1399       /* 3 is leftmost element in register */
1400       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1401       int i;
1402
1403       subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1404       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1405       subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1406
1407       for (i = 0; i < 16; ++i)
1408         perm[i] = GEN_INT (reorder[i]);
1409
1410       pcv = force_reg (V16QImode,
1411                        gen_rtx_CONST_VECTOR (V16QImode,
1412                                              gen_rtvec_v (16, perm)));
1413       emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1414                                                  subreg2, pcv));
1415       DONE;
1416     }
1417 })
1418
1419 (define_insn "*vsx_ld_elemrev_v16qi_internal"
1420   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1421         (vec_select:V16QI
1422           (match_operand:V16QI 1 "memory_operand" "Z")
1423           (parallel [(const_int 15) (const_int 14)
1424                      (const_int 13) (const_int 12)
1425                      (const_int 11) (const_int 10)
1426                      (const_int  9) (const_int  8)
1427                      (const_int  7) (const_int  6)
1428                      (const_int  5) (const_int  4)
1429                      (const_int  3) (const_int  2)
1430                      (const_int  1) (const_int  0)])))]
1431   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1432   "lxvb16x %x0,%y1"
1433   [(set_attr "type" "vecload")])
1434
1435 (define_insn "vsx_st_elemrev_v1ti"
1436   [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1437         (vec_select:V1TI
1438           (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1439           (parallel [(const_int 0)])))
1440    (clobber (match_dup 1))]
1441   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1442 {
1443   return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1444 }
1445   [(set_attr "type" "vecstore")])
1446
1447 (define_insn "vsx_st_elemrev_v2df"
1448   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1449         (vec_select:V2DF
1450           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1451           (parallel [(const_int 1) (const_int 0)])))]
1452   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1453   "stxvd2x %x1,%y0"
1454   [(set_attr "type" "vecstore")])
1455
1456 (define_insn "vsx_st_elemrev_v2di"
1457   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1458         (vec_select:V2DI
1459           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1460           (parallel [(const_int 1) (const_int 0)])))]
1461   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1462   "stxvd2x %x1,%y0"
1463   [(set_attr "type" "vecstore")])
1464
1465 (define_insn "vsx_st_elemrev_v4sf"
1466   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1467         (vec_select:V4SF
1468           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1469           (parallel [(const_int 3) (const_int 2)
1470                      (const_int 1) (const_int 0)])))]
1471   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1472   "stxvw4x %x1,%y0"
1473   [(set_attr "type" "vecstore")])
1474
1475 (define_insn "vsx_st_elemrev_v4si"
1476   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1477         (vec_select:V4SI
1478           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1479           (parallel [(const_int 3) (const_int 2)
1480                      (const_int 1) (const_int 0)])))]
1481   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1482   "stxvw4x %x1,%y0"
1483   [(set_attr "type" "vecstore")])
1484
1485 (define_expand "vsx_st_elemrev_v8hi"
1486   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1487         (vec_select:V8HI
1488           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1489           (parallel [(const_int 7) (const_int 6)
1490                      (const_int 5) (const_int 4)
1491                      (const_int 3) (const_int 2)
1492                      (const_int 1) (const_int 0)])))]
1493   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1494 {
1495   if (!TARGET_P9_VECTOR)
1496     {
1497       rtx mem_subreg, subreg, perm[16], pcv;
1498       rtx tmp = gen_reg_rtx (V8HImode);
1499       /* 2 is leftmost element in register */
1500       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1501       int i;
1502
1503       for (i = 0; i < 16; ++i)
1504         perm[i] = GEN_INT (reorder[i]);
1505
1506       pcv = force_reg (V16QImode,
1507                        gen_rtx_CONST_VECTOR (V16QImode,
1508                                              gen_rtvec_v (16, perm)));
1509       emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1510                                                 operands[1], pcv));
1511       subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1512       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1513       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1514       DONE;
1515     }
1516 })
1517
1518 (define_insn "*vsx_st_elemrev_v2di_internal"
1519   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1520         (vec_select:V2DI
1521           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1522           (parallel [(const_int 1) (const_int 0)])))]
1523   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1524   "stxvd2x %x1,%y0"
1525   [(set_attr "type" "vecstore")])
1526
1527 (define_insn "*vsx_st_elemrev_v8hi_internal"
1528   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1529         (vec_select:V8HI
1530           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1531           (parallel [(const_int 7) (const_int 6)
1532                      (const_int 5) (const_int 4)
1533                      (const_int 3) (const_int 2)
1534                      (const_int 1) (const_int 0)])))]
1535   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1536   "stxvh8x %x1,%y0"
1537   [(set_attr "type" "vecstore")])
1538
1539 (define_expand "vsx_st_elemrev_v16qi"
1540   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1541         (vec_select:V16QI
1542           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1543           (parallel [(const_int 15) (const_int 14)
1544                      (const_int 13) (const_int 12)
1545                      (const_int 11) (const_int 10)
1546                      (const_int  9) (const_int  8)
1547                      (const_int  7) (const_int  6)
1548                      (const_int  5) (const_int  4)
1549                      (const_int  3) (const_int  2)
1550                      (const_int  1) (const_int  0)])))]
1551   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1552 {
1553   if (!TARGET_P9_VECTOR)
1554     {
1555       rtx mem_subreg, subreg, perm[16], pcv;
1556       rtx tmp = gen_reg_rtx (V16QImode);
1557       /* 3 is leftmost element in register */
1558       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1559       int i;
1560
1561       for (i = 0; i < 16; ++i)
1562         perm[i] = GEN_INT (reorder[i]);
1563
1564       pcv = force_reg (V16QImode,
1565                        gen_rtx_CONST_VECTOR (V16QImode,
1566                                              gen_rtvec_v (16, perm)));
1567       emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1568                                                  operands[1], pcv));
1569       subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1570       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1571       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1572       DONE;
1573     }
1574 })
1575
1576 (define_insn "*vsx_st_elemrev_v16qi_internal"
1577   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1578         (vec_select:V16QI
1579           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1580           (parallel [(const_int 15) (const_int 14)
1581                      (const_int 13) (const_int 12)
1582                      (const_int 11) (const_int 10)
1583                      (const_int  9) (const_int  8)
1584                      (const_int  7) (const_int  6)
1585                      (const_int  5) (const_int  4)
1586                      (const_int  3) (const_int  2)
1587                      (const_int  1) (const_int  0)])))]
1588   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1589   "stxvb16x %x1,%y0"
1590   [(set_attr "type" "vecstore")])
1591
1592 \f
1593 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1594 ;; instructions are now combined with the insn for the traditional floating
1595 ;; point unit.
1596 (define_insn "*vsx_add<mode>3"
1597   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1598         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1599                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1600   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1601   "xvadd<VSs> %x0,%x1,%x2"
1602   [(set_attr "type" "<VStype_simple>")
1603    (set_attr "fp_type" "<VSfptype_simple>")])
1604
1605 (define_insn "*vsx_sub<mode>3"
1606   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1607         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1608                      (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1609   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1610   "xvsub<VSs> %x0,%x1,%x2"
1611   [(set_attr "type" "<VStype_simple>")
1612    (set_attr "fp_type" "<VSfptype_simple>")])
1613
1614 (define_insn "*vsx_mul<mode>3"
1615   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1616         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1617                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1618   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1619   "xvmul<VSs> %x0,%x1,%x2"
1620   [(set_attr "type" "<VStype_simple>")
1621    (set_attr "fp_type" "<VSfptype_mul>")])
1622
1623 ; Emulate vector with scalar for vec_mul in V2DImode
1624 (define_insn_and_split "vsx_mul_v2di"
1625   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1626         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1627                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1628                      UNSPEC_VSX_MULSD))]
1629   "VECTOR_MEM_VSX_P (V2DImode)"
1630   "#"
1631   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1632   [(const_int 0)]
1633 {
1634   rtx op0 = operands[0];
1635   rtx op1 = operands[1];
1636   rtx op2 = operands[2];
1637   rtx op3 = gen_reg_rtx (DImode);
1638   rtx op4 = gen_reg_rtx (DImode);
1639   rtx op5 = gen_reg_rtx (DImode);
1640   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1641   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1642   if (TARGET_POWERPC64)
1643     emit_insn (gen_muldi3 (op5, op3, op4));
1644   else
1645     {
1646       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1647       emit_move_insn (op5, ret);
1648     }
1649   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1650   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1651   if (TARGET_POWERPC64)
1652     emit_insn (gen_muldi3 (op3, op3, op4));
1653   else
1654     {
1655       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1656       emit_move_insn (op3, ret);
1657     }
1658   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1659   DONE;
1660 }
1661   [(set_attr "type" "mul")])
1662
1663 (define_insn "*vsx_div<mode>3"
1664   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1665         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1666                    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1667   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1668   "xvdiv<VSs> %x0,%x1,%x2"
1669   [(set_attr "type" "<VStype_div>")
1670    (set_attr "fp_type" "<VSfptype_div>")])
1671
1672 ; Emulate vector with scalar for vec_div in V2DImode
1673 (define_insn_and_split "vsx_div_v2di"
1674   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1675         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1676                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1677                      UNSPEC_VSX_DIVSD))]
1678   "VECTOR_MEM_VSX_P (V2DImode)"
1679   "#"
1680   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1681   [(const_int 0)]
1682 {
1683   rtx op0 = operands[0];
1684   rtx op1 = operands[1];
1685   rtx op2 = operands[2];
1686   rtx op3 = gen_reg_rtx (DImode);
1687   rtx op4 = gen_reg_rtx (DImode);
1688   rtx op5 = gen_reg_rtx (DImode);
1689   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1690   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1691   if (TARGET_POWERPC64)
1692     emit_insn (gen_divdi3 (op5, op3, op4));
1693   else
1694     {
1695       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1696       rtx target = emit_library_call_value (libfunc,
1697                                             op5, LCT_NORMAL, DImode,
1698                                             op3, DImode,
1699                                             op4, DImode);
1700       emit_move_insn (op5, target);
1701     }
1702   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1703   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1704   if (TARGET_POWERPC64)
1705     emit_insn (gen_divdi3 (op3, op3, op4));
1706   else
1707     {
1708       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1709       rtx target = emit_library_call_value (libfunc,
1710                                             op3, LCT_NORMAL, DImode,
1711                                             op3, DImode,
1712                                             op4, DImode);
1713       emit_move_insn (op3, target);
1714     }
1715   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1716   DONE;
1717 }
1718   [(set_attr "type" "div")])
1719
1720 (define_insn_and_split "vsx_udiv_v2di"
1721   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1722         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1723                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1724                      UNSPEC_VSX_DIVUD))]
1725   "VECTOR_MEM_VSX_P (V2DImode)"
1726   "#"
1727   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1728   [(const_int 0)]
1729 {
1730   rtx op0 = operands[0];
1731   rtx op1 = operands[1];
1732   rtx op2 = operands[2];
1733   rtx op3 = gen_reg_rtx (DImode);
1734   rtx op4 = gen_reg_rtx (DImode);
1735   rtx op5 = gen_reg_rtx (DImode);
1736   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1737   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1738   if (TARGET_POWERPC64)
1739     emit_insn (gen_udivdi3 (op5, op3, op4));
1740   else
1741     {
1742       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1743       rtx target = emit_library_call_value (libfunc,
1744                                             op5, LCT_NORMAL, DImode,
1745                                             op3, DImode,
1746                                             op4, DImode);
1747       emit_move_insn (op5, target);
1748     }
1749   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1750   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1751   if (TARGET_POWERPC64)
1752     emit_insn (gen_udivdi3 (op3, op3, op4));
1753   else
1754     {
1755       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1756       rtx target = emit_library_call_value (libfunc,
1757                                             op3, LCT_NORMAL, DImode,
1758                                             op3, DImode,
1759                                             op4, DImode);
1760       emit_move_insn (op3, target);
1761     }
1762   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1763   DONE;
1764 }
1765   [(set_attr "type" "div")])
1766
1767 ;; *tdiv* instruction returning the FG flag
1768 (define_expand "vsx_tdiv<mode>3_fg"
1769   [(set (match_dup 3)
1770         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1771                       (match_operand:VSX_B 2 "vsx_register_operand")]
1772                      UNSPEC_VSX_TDIV))
1773    (set (match_operand:SI 0 "gpc_reg_operand")
1774         (gt:SI (match_dup 3)
1775                (const_int 0)))]
1776   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1777 {
1778   operands[3] = gen_reg_rtx (CCFPmode);
1779 })
1780
1781 ;; *tdiv* instruction returning the FE flag
1782 (define_expand "vsx_tdiv<mode>3_fe"
1783   [(set (match_dup 3)
1784         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1785                       (match_operand:VSX_B 2 "vsx_register_operand")]
1786                      UNSPEC_VSX_TDIV))
1787    (set (match_operand:SI 0 "gpc_reg_operand")
1788         (eq:SI (match_dup 3)
1789                (const_int 0)))]
1790   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1791 {
1792   operands[3] = gen_reg_rtx (CCFPmode);
1793 })
1794
1795 (define_insn "*vsx_tdiv<mode>3_internal"
1796   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1797         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1798                       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1799                    UNSPEC_VSX_TDIV))]
1800   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1801   "x<VSv>tdiv<VSs> %0,%x1,%x2"
1802   [(set_attr "type" "<VStype_simple>")
1803    (set_attr "fp_type" "<VSfptype_simple>")])
1804
1805 (define_insn "vsx_fre<mode>2"
1806   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1807         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1808                       UNSPEC_FRES))]
1809   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1810   "xvre<VSs> %x0,%x1"
1811   [(set_attr "type" "<VStype_simple>")
1812    (set_attr "fp_type" "<VSfptype_simple>")])
1813
1814 (define_insn "*vsx_neg<mode>2"
1815   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1816         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1817   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1818   "xvneg<VSs> %x0,%x1"
1819   [(set_attr "type" "<VStype_simple>")
1820    (set_attr "fp_type" "<VSfptype_simple>")])
1821
1822 (define_insn "*vsx_abs<mode>2"
1823   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1824         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1825   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1826   "xvabs<VSs> %x0,%x1"
1827   [(set_attr "type" "<VStype_simple>")
1828    (set_attr "fp_type" "<VSfptype_simple>")])
1829
1830 (define_insn "vsx_nabs<mode>2"
1831   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1832         (neg:VSX_F
1833          (abs:VSX_F
1834           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1835   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1836   "xvnabs<VSs> %x0,%x1"
1837   [(set_attr "type" "<VStype_simple>")
1838    (set_attr "fp_type" "<VSfptype_simple>")])
1839
1840 (define_insn "vsx_smax<mode>3"
1841   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1842         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1843                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1844   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1845   "xvmax<VSs> %x0,%x1,%x2"
1846   [(set_attr "type" "<VStype_simple>")
1847    (set_attr "fp_type" "<VSfptype_simple>")])
1848
1849 (define_insn "*vsx_smin<mode>3"
1850   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1851         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1852                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1853   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1854   "xvmin<VSs> %x0,%x1,%x2"
1855   [(set_attr "type" "<VStype_simple>")
1856    (set_attr "fp_type" "<VSfptype_simple>")])
1857
1858 (define_insn "*vsx_sqrt<mode>2"
1859   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1860         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1861   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1862   "xvsqrt<VSs> %x0,%x1"
1863   [(set_attr "type" "<VStype_sqrt>")
1864    (set_attr "fp_type" "<VSfptype_sqrt>")])
1865
1866 (define_insn "*vsx_rsqrte<mode>2"
1867   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1868         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1869                       UNSPEC_RSQRT))]
1870   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1871   "xvrsqrte<VSs> %x0,%x1"
1872   [(set_attr "type" "<VStype_simple>")
1873    (set_attr "fp_type" "<VSfptype_simple>")])
1874
1875 ;; *tsqrt* returning the fg flag
1876 (define_expand "vsx_tsqrt<mode>2_fg"
1877   [(set (match_dup 2)
1878         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1879                      UNSPEC_VSX_TSQRT))
1880    (set (match_operand:SI 0 "gpc_reg_operand")
1881         (gt:SI (match_dup 2)
1882                (const_int 0)))]
1883   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1884 {
1885   operands[2] = gen_reg_rtx (CCFPmode);
1886 })
1887
1888 ;; *tsqrt* returning the fe flag
1889 (define_expand "vsx_tsqrt<mode>2_fe"
1890   [(set (match_dup 2)
1891         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1892                      UNSPEC_VSX_TSQRT))
1893    (set (match_operand:SI 0 "gpc_reg_operand")
1894         (eq:SI (match_dup 2)
1895                (const_int 0)))]
1896   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1897 {
1898   operands[2] = gen_reg_rtx (CCFPmode);
1899 })
1900
1901 (define_insn "*vsx_tsqrt<mode>2_internal"
1902   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1903         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1904                      UNSPEC_VSX_TSQRT))]
1905   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1906   "x<VSv>tsqrt<VSs> %0,%x1"
1907   [(set_attr "type" "<VStype_simple>")
1908    (set_attr "fp_type" "<VSfptype_simple>")])
1909
1910 ;; Fused vector multiply/add instructions. Support the classical Altivec
1911 ;; versions of fma, which allows the target to be a separate register from the
1912 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1913 ;; multiply.
1914
1915 (define_insn "*vsx_fmav4sf4"
1916   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1917         (fma:V4SF
1918           (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1919           (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1920           (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1921   "VECTOR_UNIT_VSX_P (V4SFmode)"
1922   "@
1923    xvmaddasp %x0,%x1,%x2
1924    xvmaddmsp %x0,%x1,%x3
1925    xvmaddasp %x0,%x1,%x2
1926    xvmaddmsp %x0,%x1,%x3
1927    vmaddfp %0,%1,%2,%3"
1928   [(set_attr "type" "vecfloat")])
1929
1930 (define_insn "*vsx_fmav2df4"
1931   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1932         (fma:V2DF
1933           (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1934           (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1935           (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1936   "VECTOR_UNIT_VSX_P (V2DFmode)"
1937   "@
1938    xvmaddadp %x0,%x1,%x2
1939    xvmaddmdp %x0,%x1,%x3
1940    xvmaddadp %x0,%x1,%x2
1941    xvmaddmdp %x0,%x1,%x3"
1942   [(set_attr "type" "vecdouble")])
1943
1944 (define_insn "*vsx_fms<mode>4"
1945   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1946         (fma:VSX_F
1947           (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1948           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1949           (neg:VSX_F
1950             (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1951   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1952   "@
1953    xvmsuba<VSs> %x0,%x1,%x2
1954    xvmsubm<VSs> %x0,%x1,%x3
1955    xvmsuba<VSs> %x0,%x1,%x2
1956    xvmsubm<VSs> %x0,%x1,%x3"
1957   [(set_attr "type" "<VStype_mul>")])
1958
1959 (define_insn "*vsx_nfma<mode>4"
1960   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1961         (neg:VSX_F
1962          (fma:VSX_F
1963           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1964           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1965           (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1966   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1967   "@
1968    xvnmadda<VSs> %x0,%x1,%x2
1969    xvnmaddm<VSs> %x0,%x1,%x3
1970    xvnmadda<VSs> %x0,%x1,%x2
1971    xvnmaddm<VSs> %x0,%x1,%x3"
1972   [(set_attr "type" "<VStype_mul>")
1973    (set_attr "fp_type" "<VSfptype_mul>")])
1974
1975 (define_insn "*vsx_nfmsv4sf4"
1976   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1977         (neg:V4SF
1978          (fma:V4SF
1979            (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1980            (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1981            (neg:V4SF
1982              (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1983   "VECTOR_UNIT_VSX_P (V4SFmode)"
1984   "@
1985    xvnmsubasp %x0,%x1,%x2
1986    xvnmsubmsp %x0,%x1,%x3
1987    xvnmsubasp %x0,%x1,%x2
1988    xvnmsubmsp %x0,%x1,%x3
1989    vnmsubfp %0,%1,%2,%3"
1990   [(set_attr "type" "vecfloat")])
1991
1992 (define_insn "*vsx_nfmsv2df4"
1993   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1994         (neg:V2DF
1995          (fma:V2DF
1996            (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1997            (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1998            (neg:V2DF
1999              (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
2000   "VECTOR_UNIT_VSX_P (V2DFmode)"
2001   "@
2002    xvnmsubadp %x0,%x1,%x2
2003    xvnmsubmdp %x0,%x1,%x3
2004    xvnmsubadp %x0,%x1,%x2
2005    xvnmsubmdp %x0,%x1,%x3"
2006   [(set_attr "type" "vecdouble")])
2007
2008 ;; Vector conditional expressions (no scalar version for these instructions)
2009 (define_insn "vsx_eq<mode>"
2010   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2011         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2012                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2013   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2014   "xvcmpeq<VSs> %x0,%x1,%x2"
2015   [(set_attr "type" "<VStype_simple>")
2016    (set_attr "fp_type" "<VSfptype_simple>")])
2017
2018 (define_insn "vsx_gt<mode>"
2019   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2020         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2021                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2022   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2023   "xvcmpgt<VSs> %x0,%x1,%x2"
2024   [(set_attr "type" "<VStype_simple>")
2025    (set_attr "fp_type" "<VSfptype_simple>")])
2026
2027 (define_insn "*vsx_ge<mode>"
2028   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2029         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2030                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2031   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2032   "xvcmpge<VSs> %x0,%x1,%x2"
2033   [(set_attr "type" "<VStype_simple>")
2034    (set_attr "fp_type" "<VSfptype_simple>")])
2035
2036 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2037 ;; indicate a combined status
2038 (define_insn "*vsx_eq_<mode>_p"
2039   [(set (reg:CC CR6_REGNO)
2040         (unspec:CC
2041          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2042                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2043          UNSPEC_PREDICATE))
2044    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2045         (eq:VSX_F (match_dup 1)
2046                   (match_dup 2)))]
2047   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2048   "xvcmpeq<VSs>. %x0,%x1,%x2"
2049   [(set_attr "type" "<VStype_simple>")])
2050
2051 (define_insn "*vsx_gt_<mode>_p"
2052   [(set (reg:CC CR6_REGNO)
2053         (unspec:CC
2054          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2055                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2056          UNSPEC_PREDICATE))
2057    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2058         (gt:VSX_F (match_dup 1)
2059                   (match_dup 2)))]
2060   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2061   "xvcmpgt<VSs>. %x0,%x1,%x2"
2062   [(set_attr "type" "<VStype_simple>")])
2063
2064 (define_insn "*vsx_ge_<mode>_p"
2065   [(set (reg:CC CR6_REGNO)
2066         (unspec:CC
2067          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2068                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2069          UNSPEC_PREDICATE))
2070    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2071         (ge:VSX_F (match_dup 1)
2072                   (match_dup 2)))]
2073   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2074   "xvcmpge<VSs>. %x0,%x1,%x2"
2075   [(set_attr "type" "<VStype_simple>")])
2076
2077 ;; Vector select
2078 (define_insn "*vsx_xxsel<mode>"
2079   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2080         (if_then_else:VSX_L
2081          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2082                 (match_operand:VSX_L 4 "zero_constant" ""))
2083          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2084          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2085   "VECTOR_MEM_VSX_P (<MODE>mode)"
2086   "xxsel %x0,%x3,%x2,%x1"
2087   [(set_attr "type" "vecmove")])
2088
2089 (define_insn "*vsx_xxsel<mode>_uns"
2090   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2091         (if_then_else:VSX_L
2092          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2093                    (match_operand:VSX_L 4 "zero_constant" ""))
2094          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2095          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2096   "VECTOR_MEM_VSX_P (<MODE>mode)"
2097   "xxsel %x0,%x3,%x2,%x1"
2098   [(set_attr "type" "vecmove")])
2099
2100 ;; Copy sign
2101 (define_insn "vsx_copysign<mode>3"
2102   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2103         (unspec:VSX_F
2104          [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2105           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
2106          UNSPEC_COPYSIGN))]
2107   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2108   "xvcpsgn<VSs> %x0,%x2,%x1"
2109   [(set_attr "type" "<VStype_simple>")
2110    (set_attr "fp_type" "<VSfptype_simple>")])
2111
2112 ;; For the conversions, limit the register class for the integer value to be
2113 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2114 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2115 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2116 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2117 ;; in allowing virtual registers.
2118 (define_insn "vsx_float<VSi><mode>2"
2119   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2120         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2121   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2122   "xvcvsx<VSc><VSs> %x0,%x1"
2123   [(set_attr "type" "<VStype_simple>")
2124    (set_attr "fp_type" "<VSfptype_simple>")])
2125
2126 (define_insn "vsx_floatuns<VSi><mode>2"
2127   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2128         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2129   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2130   "xvcvux<VSc><VSs> %x0,%x1"
2131   [(set_attr "type" "<VStype_simple>")
2132    (set_attr "fp_type" "<VSfptype_simple>")])
2133
2134 (define_insn "vsx_fix_trunc<mode><VSi>2"
2135   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2136         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2137   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2138   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2139   [(set_attr "type" "<VStype_simple>")
2140    (set_attr "fp_type" "<VSfptype_simple>")])
2141
2142 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2143   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2144         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2145   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2146   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2147   [(set_attr "type" "<VStype_simple>")
2148    (set_attr "fp_type" "<VSfptype_simple>")])
2149
2150 ;; Math rounding functions
2151 (define_insn "vsx_x<VSv>r<VSs>i"
2152   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2153         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2154                       UNSPEC_VSX_ROUND_I))]
2155   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2156   "x<VSv>r<VSs>i %x0,%x1"
2157   [(set_attr "type" "<VStype_simple>")
2158    (set_attr "fp_type" "<VSfptype_simple>")])
2159
2160 (define_insn "vsx_x<VSv>r<VSs>ic"
2161   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2162         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2163                       UNSPEC_VSX_ROUND_IC))]
2164   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2165   "x<VSv>r<VSs>ic %x0,%x1"
2166   [(set_attr "type" "<VStype_simple>")
2167    (set_attr "fp_type" "<VSfptype_simple>")])
2168
2169 (define_insn "vsx_btrunc<mode>2"
2170   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2171         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
2172   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2173   "xvr<VSs>iz %x0,%x1"
2174   [(set_attr "type" "<VStype_simple>")
2175    (set_attr "fp_type" "<VSfptype_simple>")])
2176
2177 (define_insn "*vsx_b2trunc<mode>2"
2178   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2179         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2180                       UNSPEC_FRIZ))]
2181   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2182   "x<VSv>r<VSs>iz %x0,%x1"
2183   [(set_attr "type" "<VStype_simple>")
2184    (set_attr "fp_type" "<VSfptype_simple>")])
2185
2186 (define_insn "vsx_floor<mode>2"
2187   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2188         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2189                       UNSPEC_FRIM))]
2190   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2191   "xvr<VSs>im %x0,%x1"
2192   [(set_attr "type" "<VStype_simple>")
2193    (set_attr "fp_type" "<VSfptype_simple>")])
2194
2195 (define_insn "vsx_ceil<mode>2"
2196   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2197         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2198                       UNSPEC_FRIP))]
2199   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2200   "xvr<VSs>ip %x0,%x1"
2201   [(set_attr "type" "<VStype_simple>")
2202    (set_attr "fp_type" "<VSfptype_simple>")])
2203
2204 \f
2205 ;; VSX convert to/from double vector
2206
2207 ;; Convert between single and double precision
2208 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2209 ;; scalar single precision instructions internally use the double format.
2210 ;; Prefer the altivec registers, since we likely will need to do a vperm
2211 (define_insn "vsx_<VS_spdp_insn>"
2212   [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2213         (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
2214                               UNSPEC_VSX_CVSPDP))]
2215   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2216   "<VS_spdp_insn> %x0,%x1"
2217   [(set_attr "type" "<VS_spdp_type>")])
2218
2219 ;; xscvspdp, represent the scalar SF type as V4SF
2220 (define_insn "vsx_xscvspdp"
2221   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2222         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2223                    UNSPEC_VSX_CVSPDP))]
2224   "VECTOR_UNIT_VSX_P (V4SFmode)"
2225   "xscvspdp %x0,%x1"
2226   [(set_attr "type" "fp")])
2227
2228 ;; Same as vsx_xscvspdp, but use SF as the type
2229 (define_insn "vsx_xscvspdp_scalar2"
2230   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2231         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2232                    UNSPEC_VSX_CVSPDP))]
2233   "VECTOR_UNIT_VSX_P (V4SFmode)"
2234   "xscvspdp %x0,%x1"
2235   [(set_attr "type" "fp")])
2236
2237 ;; Generate xvcvhpsp instruction
2238 (define_insn "vsx_xvcvhpsp"
2239   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2240         (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2241                      UNSPEC_VSX_CVHPSP))]
2242   "TARGET_P9_VECTOR"
2243   "xvcvhpsp %x0,%x1"
2244   [(set_attr "type" "vecfloat")])
2245
2246 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2247 ;; format of scalars is actually DF.
2248 (define_insn "vsx_xscvdpsp_scalar"
2249   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2250         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2251                      UNSPEC_VSX_CVSPDP))]
2252   "VECTOR_UNIT_VSX_P (V4SFmode)"
2253   "xscvdpsp %x0,%x1"
2254   [(set_attr "type" "fp")])
2255
2256 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2257 (define_insn "vsx_xscvdpspn"
2258   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2259         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
2260                      UNSPEC_VSX_CVDPSPN))]
2261   "TARGET_XSCVDPSPN"
2262   "xscvdpspn %x0,%x1"
2263   [(set_attr "type" "fp")])
2264
2265 (define_insn "vsx_xscvspdpn"
2266   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2267         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2268                    UNSPEC_VSX_CVSPDPN))]
2269   "TARGET_XSCVSPDPN"
2270   "xscvspdpn %x0,%x1"
2271   [(set_attr "type" "fp")])
2272
2273 (define_insn "vsx_xscvdpspn_scalar"
2274   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2275         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2276                      UNSPEC_VSX_CVDPSPN))]
2277   "TARGET_XSCVDPSPN"
2278   "xscvdpspn %x0,%x1"
2279   [(set_attr "type" "fp")])
2280
2281 ;; Used by direct move to move a SFmode value from GPR to VSX register
2282 (define_insn "vsx_xscvspdpn_directmove"
2283   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2284         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2285                    UNSPEC_VSX_CVSPDPN))]
2286   "TARGET_XSCVSPDPN"
2287   "xscvspdpn %x0,%x1"
2288   [(set_attr "type" "fp")])
2289
2290 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2291
2292 (define_expand "vsx_xvcvsxddp_scale"
2293   [(match_operand:V2DF 0 "vsx_register_operand")
2294    (match_operand:V2DI 1 "vsx_register_operand")
2295    (match_operand:QI 2 "immediate_operand")]
2296   "VECTOR_UNIT_VSX_P (V2DFmode)"
2297 {
2298   rtx op0 = operands[0];
2299   rtx op1 = operands[1];
2300   int scale = INTVAL(operands[2]);
2301   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2302   if (scale != 0)
2303     rs6000_scale_v2df (op0, op0, -scale);
2304   DONE;
2305 })
2306
2307 (define_insn "vsx_xvcvsxddp"
2308   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2309         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2310                      UNSPEC_VSX_XVCVSXDDP))]
2311   "VECTOR_UNIT_VSX_P (V2DFmode)"
2312   "xvcvsxddp %x0,%x1"
2313   [(set_attr "type" "vecdouble")])
2314
2315 (define_expand "vsx_xvcvuxddp_scale"
2316   [(match_operand:V2DF 0 "vsx_register_operand")
2317    (match_operand:V2DI 1 "vsx_register_operand")
2318    (match_operand:QI 2 "immediate_operand")]
2319   "VECTOR_UNIT_VSX_P (V2DFmode)"
2320 {
2321   rtx op0 = operands[0];
2322   rtx op1 = operands[1];
2323   int scale = INTVAL(operands[2]);
2324   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2325   if (scale != 0)
2326     rs6000_scale_v2df (op0, op0, -scale);
2327   DONE;
2328 })
2329
2330 (define_insn "vsx_xvcvuxddp"
2331   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2332         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2333                      UNSPEC_VSX_XVCVUXDDP))]
2334   "VECTOR_UNIT_VSX_P (V2DFmode)"
2335   "xvcvuxddp %x0,%x1"
2336   [(set_attr "type" "vecdouble")])
2337
2338 (define_expand "vsx_xvcvdpsxds_scale"
2339   [(match_operand:V2DI 0 "vsx_register_operand")
2340    (match_operand:V2DF 1 "vsx_register_operand")
2341    (match_operand:QI 2 "immediate_operand")]
2342   "VECTOR_UNIT_VSX_P (V2DFmode)"
2343 {
2344   rtx op0 = operands[0];
2345   rtx op1 = operands[1];
2346   rtx tmp;
2347   int scale = INTVAL (operands[2]);
2348   if (scale == 0)
2349     tmp = op1;
2350   else
2351     {
2352       tmp  = gen_reg_rtx (V2DFmode);
2353       rs6000_scale_v2df (tmp, op1, scale);
2354     }
2355   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2356   DONE;
2357 })
2358
2359 ;; convert vector of 64-bit floating point numbers to vector of
2360 ;; 64-bit signed integer
2361 (define_insn "vsx_xvcvdpsxds"
2362   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2363         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2364                      UNSPEC_VSX_XVCVDPSXDS))]
2365   "VECTOR_UNIT_VSX_P (V2DFmode)"
2366   "xvcvdpsxds %x0,%x1"
2367   [(set_attr "type" "vecdouble")])
2368
2369 ;; convert vector of 32-bit floating point numbers to vector of
2370 ;; 32-bit signed integer
2371 (define_insn "vsx_xvcvspsxws"
2372   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2373         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2374                      UNSPEC_VSX_XVCVSPSXWS))]
2375   "VECTOR_UNIT_VSX_P (V4SFmode)"
2376   "xvcvspsxws %x0,%x1"
2377   [(set_attr "type" "vecfloat")])
2378
2379 ;; convert vector of 64-bit floating point numbers to vector of
2380 ;; 64-bit unsigned integer
2381 (define_expand "vsx_xvcvdpuxds_scale"
2382   [(match_operand:V2DI 0 "vsx_register_operand")
2383    (match_operand:V2DF 1 "vsx_register_operand")
2384    (match_operand:QI 2 "immediate_operand")]
2385   "VECTOR_UNIT_VSX_P (V2DFmode)"
2386 {
2387   rtx op0 = operands[0];
2388   rtx op1 = operands[1];
2389   rtx tmp;
2390   int scale = INTVAL (operands[2]);
2391   if (scale == 0)
2392     tmp = op1;
2393   else
2394     {
2395       tmp = gen_reg_rtx (V2DFmode);
2396       rs6000_scale_v2df (tmp, op1, scale);
2397     }
2398   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2399   DONE;
2400 })
2401
2402 ;; convert vector of 32-bit floating point numbers to vector of
2403 ;; 32-bit unsigned integer
2404 (define_insn "vsx_xvcvspuxws"
2405   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2406         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2407                      UNSPEC_VSX_XVCVSPSXWS))]
2408   "VECTOR_UNIT_VSX_P (V4SFmode)"
2409   "xvcvspuxws %x0,%x1"
2410   [(set_attr "type" "vecfloat")])
2411
2412 (define_insn "vsx_xvcvdpuxds"
2413   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2414         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2415                      UNSPEC_VSX_XVCVDPUXDS))]
2416   "VECTOR_UNIT_VSX_P (V2DFmode)"
2417   "xvcvdpuxds %x0,%x1"
2418   [(set_attr "type" "vecdouble")])
2419
2420 ;; Convert from 64-bit to 32-bit types
2421 ;; Note, favor the Altivec registers since the usual use of these instructions
2422 ;; is in vector converts and we need to use the Altivec vperm instruction.
2423
2424 (define_insn "vsx_xvcvdpsxws"
2425   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2426         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2427                      UNSPEC_VSX_CVDPSXWS))]
2428   "VECTOR_UNIT_VSX_P (V2DFmode)"
2429   "xvcvdpsxws %x0,%x1"
2430   [(set_attr "type" "vecdouble")])
2431
2432 (define_insn "vsx_xvcvdpuxws"
2433   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2434         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2435                      UNSPEC_VSX_CVDPUXWS))]
2436   "VECTOR_UNIT_VSX_P (V2DFmode)"
2437   "xvcvdpuxws %x0,%x1"
2438   [(set_attr "type" "vecdouble")])
2439
2440 (define_insn "vsx_xvcvsxdsp"
2441   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2442         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2443                      UNSPEC_VSX_CVSXDSP))]
2444   "VECTOR_UNIT_VSX_P (V2DFmode)"
2445   "xvcvsxdsp %x0,%x1"
2446   [(set_attr "type" "vecfloat")])
2447
2448 (define_insn "vsx_xvcvuxdsp"
2449   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2450         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2451                      UNSPEC_VSX_CVUXDSP))]
2452   "VECTOR_UNIT_VSX_P (V2DFmode)"
2453   "xvcvuxdsp %x0,%x1"
2454   [(set_attr "type" "vecdouble")])
2455
2456 (define_insn "vsx_xvcdpsp"
2457   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2458         (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2459                      UNSPEC_VSX_XVCDPSP))]
2460   "VECTOR_UNIT_VSX_P (V2DFmode)"
2461   "xvcvdpsp %x0,%x1"
2462   [(set_attr "type" "vecdouble")])
2463
2464 ;; Convert from 32-bit to 64-bit types
2465 ;; Provide both vector and scalar targets
2466 (define_insn "vsx_xvcvsxwdp"
2467   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2468         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2469                      UNSPEC_VSX_CVSXWDP))]
2470   "VECTOR_UNIT_VSX_P (V2DFmode)"
2471   "xvcvsxwdp %x0,%x1"
2472   [(set_attr "type" "vecdouble")])
2473
2474 (define_insn "vsx_xvcvsxwdp_df"
2475   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2476         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2477                    UNSPEC_VSX_CVSXWDP))]
2478   "TARGET_VSX"
2479   "xvcvsxwdp %x0,%x1"
2480   [(set_attr "type" "vecdouble")])
2481
2482 (define_insn "vsx_xvcvuxwdp"
2483   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2484         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2485                      UNSPEC_VSX_CVUXWDP))]
2486   "VECTOR_UNIT_VSX_P (V2DFmode)"
2487   "xvcvuxwdp %x0,%x1"
2488   [(set_attr "type" "vecdouble")])
2489
2490 (define_insn "vsx_xvcvuxwdp_df"
2491   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2492         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2493                    UNSPEC_VSX_CVUXWDP))]
2494   "TARGET_VSX"
2495   "xvcvuxwdp %x0,%x1"
2496   [(set_attr "type" "vecdouble")])
2497
2498 (define_insn "vsx_xvcvspsxds"
2499   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2500         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2501                      UNSPEC_VSX_CVSPSXDS))]
2502   "VECTOR_UNIT_VSX_P (V2DFmode)"
2503   "xvcvspsxds %x0,%x1"
2504   [(set_attr "type" "vecdouble")])
2505
2506 (define_insn "vsx_xvcvspuxds"
2507   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2508         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2509                      UNSPEC_VSX_CVSPUXDS))]
2510   "VECTOR_UNIT_VSX_P (V2DFmode)"
2511   "xvcvspuxds %x0,%x1"
2512   [(set_attr "type" "vecdouble")])
2513
2514 (define_insn "vsx_xvcvsxwsp"
2515   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2516         (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2517                      UNSPEC_VSX_CVSXWSP))]
2518   "VECTOR_UNIT_VSX_P (V4SFmode)"
2519   "xvcvsxwsp %x0,%x1"
2520   [(set_attr "type" "vecfloat")])
2521
2522 (define_insn "vsx_xvcvuxwsp"
2523   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2524         (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2525                     UNSPEC_VSX_CVUXWSP))]
2526   "VECTOR_UNIT_VSX_P (V4SFmode)"
2527   "xvcvuxwsp %x0,%x1"
2528   [(set_attr "type" "vecfloat")])
2529
2530 ;; Generate float2 double
2531 ;; convert two double to float
2532 (define_expand "float2_v2df"
2533   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2534    (use (match_operand:V2DF 1 "register_operand" "wa"))
2535    (use (match_operand:V2DF 2 "register_operand" "wa"))]
2536  "VECTOR_UNIT_VSX_P (V4SFmode)"
2537 {
2538   rtx rtx_src1, rtx_src2, rtx_dst;
2539
2540   rtx_dst = operands[0];
2541   rtx_src1 = operands[1];
2542   rtx_src2 = operands[2];
2543
2544   rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2545   DONE;
2546 })
2547
2548 ;; Generate float2
2549 ;; convert two long long signed ints to float
2550 (define_expand "float2_v2di"
2551   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2552    (use (match_operand:V2DI 1 "register_operand" "wa"))
2553    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2554  "VECTOR_UNIT_VSX_P (V4SFmode)"
2555 {
2556   rtx rtx_src1, rtx_src2, rtx_dst;
2557
2558   rtx_dst = operands[0];
2559   rtx_src1 = operands[1];
2560   rtx_src2 = operands[2];
2561
2562   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2563   DONE;
2564 })
2565
2566 ;; Generate uns_float2
2567 ;; convert two long long unsigned ints to float
2568 (define_expand "uns_float2_v2di"
2569   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2570    (use (match_operand:V2DI 1 "register_operand" "wa"))
2571    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2572  "VECTOR_UNIT_VSX_P (V4SFmode)"
2573 {
2574   rtx rtx_src1, rtx_src2, rtx_dst;
2575
2576   rtx_dst = operands[0];
2577   rtx_src1 = operands[1];
2578   rtx_src2 = operands[2];
2579
2580   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2581   DONE;
2582 })
2583
2584 ;; Generate floate
2585 ;; convert  double or long long signed to float
2586 ;; (Only even words are valid, BE numbering)
2587 (define_expand "floate<mode>"
2588   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2589    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2590   "VECTOR_UNIT_VSX_P (V4SFmode)"
2591 {
2592   if (VECTOR_ELT_ORDER_BIG)
2593     {
2594       /* Shift left one word to put even word correct location */
2595       rtx rtx_tmp;
2596       rtx rtx_val = GEN_INT (4);
2597
2598       rtx_tmp = gen_reg_rtx (V4SFmode);
2599       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2600       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2601                  rtx_tmp, rtx_tmp, rtx_val));
2602     }
2603   else
2604     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2605
2606   DONE;
2607 })
2608
2609 ;; Generate uns_floate
2610 ;; convert long long unsigned to float
2611 ;; (Only even words are valid, BE numbering)
2612 (define_expand "unsfloatev2di"
2613   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2614    (use (match_operand:V2DI 1 "register_operand" "wa"))]
2615   "VECTOR_UNIT_VSX_P (V4SFmode)"
2616 {
2617   if (VECTOR_ELT_ORDER_BIG)
2618     {
2619       /* Shift left one word to put even word correct location */
2620       rtx rtx_tmp;
2621       rtx rtx_val = GEN_INT (4);
2622
2623       rtx_tmp = gen_reg_rtx (V4SFmode);
2624       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2625       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2626                  rtx_tmp, rtx_tmp, rtx_val));
2627     }
2628   else
2629     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2630
2631   DONE;
2632 })
2633
2634 ;; Generate floato
2635 ;; convert double or long long signed to float
2636 ;; Only odd words are valid, BE numbering)
2637 (define_expand "floato<mode>"
2638   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2639    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2640   "VECTOR_UNIT_VSX_P (V4SFmode)"
2641 {
2642   if (VECTOR_ELT_ORDER_BIG)
2643     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2644   else
2645     {
2646       /* Shift left one word to put odd word correct location */
2647       rtx rtx_tmp;
2648       rtx rtx_val = GEN_INT (4);
2649
2650       rtx_tmp = gen_reg_rtx (V4SFmode);
2651       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2652       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2653                  rtx_tmp, rtx_tmp, rtx_val));
2654     }
2655   DONE;
2656 })
2657
2658 ;; Generate uns_floato
2659 ;; convert long long unsigned to float
2660 ;; (Only odd words are valid, BE numbering)
2661 (define_expand "unsfloatov2di"
2662  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2663   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2664  "VECTOR_UNIT_VSX_P (V4SFmode)"
2665 {
2666   if (VECTOR_ELT_ORDER_BIG)
2667     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2668   else
2669     {
2670       /* Shift left one word to put odd word correct location */
2671       rtx rtx_tmp;
2672       rtx rtx_val = GEN_INT (4);
2673
2674       rtx_tmp = gen_reg_rtx (V4SFmode);
2675       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2676       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2677                  rtx_tmp, rtx_tmp, rtx_val));
2678     }
2679   DONE;
2680 })
2681
2682 ;; Generate vsigned2
2683 ;; convert two double float vectors to a vector of single precision ints
2684 (define_expand "vsigned2_v2df"
2685   [(match_operand:V4SI 0 "register_operand" "=wa")
2686    (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2687                  (match_operand:V2DF 2 "register_operand" "wa")]
2688   UNSPEC_VSX_VSIGNED2)]
2689   "TARGET_VSX"
2690 {
2691   rtx rtx_src1, rtx_src2, rtx_dst;
2692   bool signed_convert=true;
2693
2694   rtx_dst = operands[0];
2695   rtx_src1 = operands[1];
2696   rtx_src2 = operands[2];
2697
2698   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2699   DONE;
2700 })
2701
2702 ;; Generate vsignedo_v2df
2703 ;; signed double float to int convert odd word
2704 (define_expand "vsignedo_v2df"
2705   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2706         (match_operand:V2DF 1 "register_operand" "wa"))]
2707   "TARGET_VSX"
2708 {
2709   if (VECTOR_ELT_ORDER_BIG)
2710     {
2711       rtx rtx_tmp;
2712       rtx rtx_val = GEN_INT (12);
2713       rtx_tmp = gen_reg_rtx (V4SImode);
2714
2715       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2716
2717       /* Big endian word numbering for words in operand is 0 1 2 3.
2718          take (operand[1] operand[1]) and shift left one word
2719          0 1 2 3    0 1 2 3  =>  1 2 3 0
2720          Words 1 and 3 are now are now where they need to be for result.  */
2721
2722       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2723                  rtx_tmp, rtx_val));
2724     }
2725   else
2726     /* Little endian word numbering for operand is 3 2 1 0.
2727        Result words 3 and 1 are where they need to be.  */
2728     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2729
2730   DONE;
2731 }
2732   [(set_attr "type" "veccomplex")])
2733
2734 ;; Generate vsignede_v2df
2735 ;; signed double float to int even word
2736 (define_expand "vsignede_v2df"
2737   [(set (match_operand:V4SI 0 "register_operand" "=v")
2738         (match_operand:V2DF 1 "register_operand" "v"))]
2739   "TARGET_VSX"
2740 {
2741   if (VECTOR_ELT_ORDER_BIG)
2742     /* Big endian word numbering for words in operand is 0 1
2743        Result words 0 is where they need to be.  */
2744     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2745
2746   else
2747     {
2748       rtx rtx_tmp;
2749       rtx rtx_val = GEN_INT (12);
2750       rtx_tmp = gen_reg_rtx (V4SImode);
2751
2752       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2753
2754       /* Little endian word numbering for operand is 3 2 1 0.
2755          take (operand[1] operand[1]) and shift left three words
2756          0 1 2 3   0 1 2 3  =>  3 0 1 2
2757          Words 0 and 2 are now where they need to be for the result.  */
2758       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2759                  rtx_tmp, rtx_val));
2760     }
2761   DONE;
2762 }
2763   [(set_attr "type" "veccomplex")])
2764
2765 ;; Generate unsigned2
2766 ;; convert two double float vectors to a vector of single precision
2767 ;; unsigned ints
2768 (define_expand "vunsigned2_v2df"
2769 [(match_operand:V4SI 0 "register_operand" "=v")
2770  (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2771                (match_operand:V2DF 2 "register_operand" "v")]
2772               UNSPEC_VSX_VSIGNED2)]
2773  "TARGET_VSX"
2774 {
2775   rtx rtx_src1, rtx_src2, rtx_dst;
2776   bool signed_convert=false;
2777
2778   rtx_dst = operands[0];
2779   rtx_src1 = operands[1];
2780   rtx_src2 = operands[2];
2781
2782   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2783   DONE;
2784 })
2785
2786 ;; Generate vunsignedo_v2df
2787 ;; unsigned double float to int convert odd word
2788 (define_expand "vunsignedo_v2df"
2789   [(set (match_operand:V4SI 0 "register_operand" "=v")
2790         (match_operand:V2DF 1 "register_operand" "v"))]
2791   "TARGET_VSX"
2792 {
2793   if (VECTOR_ELT_ORDER_BIG)
2794     {
2795       rtx rtx_tmp;
2796       rtx rtx_val = GEN_INT (12);
2797       rtx_tmp = gen_reg_rtx (V4SImode);
2798
2799       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2800
2801       /* Big endian word numbering for words in operand is 0 1 2 3.
2802          take (operand[1] operand[1]) and shift left one word
2803          0 1 2 3    0 1 2 3  =>  1 2 3 0
2804          Words 1 and 3 are now are now where they need to be for result.  */
2805
2806       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2807                  rtx_tmp, rtx_val));
2808     }
2809   else
2810     /* Little endian word numbering for operand is 3 2 1 0.
2811        Result words 3 and 1 are where they need to be.  */
2812     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2813
2814   DONE;
2815 }
2816   [(set_attr "type" "veccomplex")])
2817
2818 ;; Generate vunsignede_v2df
2819 ;; unsigned double float to int even word
2820 (define_expand "vunsignede_v2df"
2821   [(set (match_operand:V4SI 0 "register_operand" "=v")
2822         (match_operand:V2DF 1 "register_operand" "v"))]
2823   "TARGET_VSX"
2824 {
2825   if (VECTOR_ELT_ORDER_BIG)
2826     /* Big endian word numbering for words in operand is 0 1
2827        Result words 0 is where they need to be.  */
2828     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2829
2830   else
2831     {
2832       rtx rtx_tmp;
2833       rtx rtx_val = GEN_INT (12);
2834       rtx_tmp = gen_reg_rtx (V4SImode);
2835
2836       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2837
2838       /* Little endian word numbering for operand is 3 2 1 0.
2839          take (operand[1] operand[1]) and shift left three words
2840          0 1 2 3   0 1 2 3  =>  3 0 1 2
2841          Words 0 and 2 are now where they need to be for the result.  */
2842       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2843                  rtx_tmp, rtx_val));
2844     }
2845   DONE;
2846 }
2847   [(set_attr "type" "veccomplex")])
2848
2849 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2850 ;; since the xvrdpiz instruction does not truncate the value if the floating
2851 ;; point value is < LONG_MIN or > LONG_MAX.
2852 (define_insn "*vsx_float_fix_v2df2"
2853   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2854         (float:V2DF
2855          (fix:V2DI
2856           (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2857   "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
2858    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2859    && !flag_trapping_math && TARGET_FRIZ"
2860   "xvrdpiz %x0,%x1"
2861   [(set_attr "type" "vecdouble")
2862    (set_attr "fp_type" "fp_addsub_d")])
2863
2864 \f
2865 ;; Permute operations
2866
2867 ;; Build a V2DF/V2DI vector from two scalars
2868 (define_insn "vsx_concat_<mode>"
2869   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2870         (vec_concat:VSX_D
2871          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2872          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2873   "VECTOR_MEM_VSX_P (<MODE>mode)"
2874 {
2875   if (which_alternative == 0)
2876     return (BYTES_BIG_ENDIAN
2877             ? "xxpermdi %x0,%x1,%x2,0"
2878             : "xxpermdi %x0,%x2,%x1,0");
2879
2880   else if (which_alternative == 1)
2881     return (BYTES_BIG_ENDIAN
2882             ? "mtvsrdd %x0,%1,%2"
2883             : "mtvsrdd %x0,%2,%1");
2884
2885   else
2886     gcc_unreachable ();
2887 }
2888   [(set_attr "type" "vecperm")])
2889
2890 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2891 ;; word element in a vector register.
2892 (define_insn "*vsx_concat_<mode>_1"
2893   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2894         (vec_concat:VSX_D
2895          (vec_select:<VS_scalar>
2896           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2897           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2898          (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2899   "VECTOR_MEM_VSX_P (<MODE>mode)"
2900 {
2901   HOST_WIDE_INT dword = INTVAL (operands[2]);
2902   if (BYTES_BIG_ENDIAN)
2903     {
2904       operands[4] = GEN_INT (2*dword);
2905       return "xxpermdi %x0,%x1,%x3,%4";
2906     }
2907   else
2908     {
2909       operands[4] = GEN_INT (!dword);
2910       return "xxpermdi %x0,%x3,%x1,%4";
2911     }
2912 }
2913   [(set_attr "type" "vecperm")])
2914
2915 (define_insn "*vsx_concat_<mode>_2"
2916   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2917         (vec_concat:VSX_D
2918          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2919          (vec_select:<VS_scalar>
2920           (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2921           (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2922   "VECTOR_MEM_VSX_P (<MODE>mode)"
2923 {
2924   HOST_WIDE_INT dword = INTVAL (operands[3]);
2925   if (BYTES_BIG_ENDIAN)
2926     {
2927       operands[4] = GEN_INT (dword);
2928       return "xxpermdi %x0,%x1,%x2,%4";
2929     }
2930   else
2931     {
2932       operands[4] = GEN_INT (2 * !dword);
2933       return "xxpermdi %x0,%x2,%x1,%4";
2934     }
2935 }
2936   [(set_attr "type" "vecperm")])
2937
2938 (define_insn "*vsx_concat_<mode>_3"
2939   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2940         (vec_concat:VSX_D
2941          (vec_select:<VS_scalar>
2942           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2943           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2944          (vec_select:<VS_scalar>
2945           (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2946           (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2947   "VECTOR_MEM_VSX_P (<MODE>mode)"
2948 {
2949   HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2950   HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2951   if (BYTES_BIG_ENDIAN)
2952     {
2953       operands[5] = GEN_INT ((2 * dword1) + dword2);
2954       return "xxpermdi %x0,%x1,%x3,%5";
2955     }
2956   else
2957     {
2958       operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2959       return "xxpermdi %x0,%x3,%x1,%5";
2960     }
2961 }
2962   [(set_attr "type" "vecperm")])
2963
2964 ;; Special purpose concat using xxpermdi to glue two single precision values
2965 ;; together, relying on the fact that internally scalar floats are represented
2966 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2967 (define_insn "vsx_concat_v2sf"
2968   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2969         (unspec:V2DF
2970          [(match_operand:SF 1 "vsx_register_operand" "ww")
2971           (match_operand:SF 2 "vsx_register_operand" "ww")]
2972          UNSPEC_VSX_CONCAT))]
2973   "VECTOR_MEM_VSX_P (V2DFmode)"
2974 {
2975   if (BYTES_BIG_ENDIAN)
2976     return "xxpermdi %x0,%x1,%x2,0";
2977   else
2978     return "xxpermdi %x0,%x2,%x1,0";
2979 }
2980   [(set_attr "type" "vecperm")])
2981
2982 ;; V4SImode initialization splitter
2983 (define_insn_and_split "vsx_init_v4si"
2984   [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2985         (unspec:V4SI
2986          [(match_operand:SI 1 "reg_or_cint_operand" "rn")
2987           (match_operand:SI 2 "reg_or_cint_operand" "rn")
2988           (match_operand:SI 3 "reg_or_cint_operand" "rn")
2989           (match_operand:SI 4 "reg_or_cint_operand" "rn")]
2990          UNSPEC_VSX_VEC_INIT))
2991    (clobber (match_scratch:DI 5 "=&r"))
2992    (clobber (match_scratch:DI 6 "=&r"))]
2993    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2994    "#"
2995    "&& reload_completed"
2996    [(const_int 0)]
2997 {
2998   rs6000_split_v4si_init (operands);
2999   DONE;
3000 })
3001
3002 ;; xxpermdi for little endian loads and stores.  We need several of
3003 ;; these since the form of the PARALLEL differs by mode.
3004 (define_insn "*vsx_xxpermdi2_le_<mode>"
3005   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3006         (vec_select:VSX_D
3007           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3008           (parallel [(const_int 1) (const_int 0)])))]
3009   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3010   "xxpermdi %x0,%x1,%x1,2"
3011   [(set_attr "type" "vecperm")])
3012
3013 (define_insn "*vsx_xxpermdi4_le_<mode>"
3014   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3015         (vec_select:VSX_W
3016           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3017           (parallel [(const_int 2) (const_int 3)
3018                      (const_int 0) (const_int 1)])))]
3019   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3020   "xxpermdi %x0,%x1,%x1,2"
3021   [(set_attr "type" "vecperm")])
3022
3023 (define_insn "*vsx_xxpermdi8_le_V8HI"
3024   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3025         (vec_select:V8HI
3026           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3027           (parallel [(const_int 4) (const_int 5)
3028                      (const_int 6) (const_int 7)
3029                      (const_int 0) (const_int 1)
3030                      (const_int 2) (const_int 3)])))]
3031   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
3032   "xxpermdi %x0,%x1,%x1,2"
3033   [(set_attr "type" "vecperm")])
3034
3035 (define_insn "*vsx_xxpermdi16_le_V16QI"
3036   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3037         (vec_select:V16QI
3038           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3039           (parallel [(const_int 8) (const_int 9)
3040                      (const_int 10) (const_int 11)
3041                      (const_int 12) (const_int 13)
3042                      (const_int 14) (const_int 15)
3043                      (const_int 0) (const_int 1)
3044                      (const_int 2) (const_int 3)
3045                      (const_int 4) (const_int 5)
3046                      (const_int 6) (const_int 7)])))]
3047   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3048   "xxpermdi %x0,%x1,%x1,2"
3049   [(set_attr "type" "vecperm")])
3050
3051 ;; lxvd2x for little endian loads.  We need several of
3052 ;; these since the form of the PARALLEL differs by mode.
3053 (define_insn "*vsx_lxvd2x2_le_<mode>"
3054   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3055         (vec_select:VSX_D
3056           (match_operand:VSX_D 1 "memory_operand" "Z")
3057           (parallel [(const_int 1) (const_int 0)])))]
3058   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3059   "lxvd2x %x0,%y1"
3060   [(set_attr "type" "vecload")])
3061
3062 (define_insn "*vsx_lxvd2x4_le_<mode>"
3063   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3064         (vec_select:VSX_W
3065           (match_operand:VSX_W 1 "memory_operand" "Z")
3066           (parallel [(const_int 2) (const_int 3)
3067                      (const_int 0) (const_int 1)])))]
3068   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3069   "lxvd2x %x0,%y1"
3070   [(set_attr "type" "vecload")])
3071
3072 (define_insn "*vsx_lxvd2x8_le_V8HI"
3073   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3074         (vec_select:V8HI
3075           (match_operand:V8HI 1 "memory_operand" "Z")
3076           (parallel [(const_int 4) (const_int 5)
3077                      (const_int 6) (const_int 7)
3078                      (const_int 0) (const_int 1)
3079                      (const_int 2) (const_int 3)])))]
3080   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3081   "lxvd2x %x0,%y1"
3082   [(set_attr "type" "vecload")])
3083
3084 (define_insn "*vsx_lxvd2x16_le_V16QI"
3085   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3086         (vec_select:V16QI
3087           (match_operand:V16QI 1 "memory_operand" "Z")
3088           (parallel [(const_int 8) (const_int 9)
3089                      (const_int 10) (const_int 11)
3090                      (const_int 12) (const_int 13)
3091                      (const_int 14) (const_int 15)
3092                      (const_int 0) (const_int 1)
3093                      (const_int 2) (const_int 3)
3094                      (const_int 4) (const_int 5)
3095                      (const_int 6) (const_int 7)])))]
3096   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3097   "lxvd2x %x0,%y1"
3098   [(set_attr "type" "vecload")])
3099
3100 ;; stxvd2x for little endian stores.  We need several of
3101 ;; these since the form of the PARALLEL differs by mode.
3102 (define_insn "*vsx_stxvd2x2_le_<mode>"
3103   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3104         (vec_select:VSX_D
3105           (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3106           (parallel [(const_int 1) (const_int 0)])))]
3107   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3108   "stxvd2x %x1,%y0"
3109   [(set_attr "type" "vecstore")])
3110
3111 (define_insn "*vsx_stxvd2x4_le_<mode>"
3112   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3113         (vec_select:VSX_W
3114           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3115           (parallel [(const_int 2) (const_int 3)
3116                      (const_int 0) (const_int 1)])))]
3117   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3118   "stxvd2x %x1,%y0"
3119   [(set_attr "type" "vecstore")])
3120
3121 (define_insn "*vsx_stxvd2x8_le_V8HI"
3122   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3123         (vec_select:V8HI
3124           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3125           (parallel [(const_int 4) (const_int 5)
3126                      (const_int 6) (const_int 7)
3127                      (const_int 0) (const_int 1)
3128                      (const_int 2) (const_int 3)])))]
3129   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3130   "stxvd2x %x1,%y0"
3131   [(set_attr "type" "vecstore")])
3132
3133 (define_insn "*vsx_stxvd2x16_le_V16QI"
3134   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3135         (vec_select:V16QI
3136           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3137           (parallel [(const_int 8) (const_int 9)
3138                      (const_int 10) (const_int 11)
3139                      (const_int 12) (const_int 13)
3140                      (const_int 14) (const_int 15)
3141                      (const_int 0) (const_int 1)
3142                      (const_int 2) (const_int 3)
3143                      (const_int 4) (const_int 5)
3144                      (const_int 6) (const_int 7)])))]
3145   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3146   "stxvd2x %x1,%y0"
3147   [(set_attr "type" "vecstore")])
3148
3149 ;; Convert a TImode value into V1TImode
3150 (define_expand "vsx_set_v1ti"
3151   [(match_operand:V1TI 0 "nonimmediate_operand")
3152    (match_operand:V1TI 1 "nonimmediate_operand")
3153    (match_operand:TI 2 "input_operand")
3154    (match_operand:QI 3 "u5bit_cint_operand")]
3155   "VECTOR_MEM_VSX_P (V1TImode)"
3156 {
3157   if (operands[3] != const0_rtx)
3158     gcc_unreachable ();
3159
3160   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3161   DONE;
3162 })
3163
3164 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3165 (define_expand "vsx_set_<mode>"
3166   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3167    (use (match_operand:VSX_D 1 "vsx_register_operand"))
3168    (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3169    (use (match_operand:QI 3 "const_0_to_1_operand"))]
3170   "VECTOR_MEM_VSX_P (<MODE>mode)"
3171 {
3172   rtx dest = operands[0];
3173   rtx vec_reg = operands[1];
3174   rtx value = operands[2];
3175   rtx ele = operands[3];
3176   rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3177
3178   if (ele == const0_rtx)
3179     {
3180       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3181       emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3182       DONE;
3183     }
3184   else if (ele == const1_rtx)
3185     {
3186       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3187       emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3188       DONE;
3189     }
3190   else
3191     gcc_unreachable ();
3192 })
3193
3194 ;; Extract a DF/DI element from V2DF/V2DI
3195 ;; Optimize cases were we can do a simple or direct move.
3196 ;; Or see if we can avoid doing the move at all
3197
3198 ;; There are some unresolved problems with reload that show up if an Altivec
3199 ;; register was picked.  Limit the scalar value to FPRs for now.
3200
3201 (define_insn "vsx_extract_<mode>"
3202   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
3203
3204         (vec_select:<VS_scalar>
3205          (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
3206
3207          (parallel
3208           [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
3209   "VECTOR_MEM_VSX_P (<MODE>mode)"
3210 {
3211   int element = INTVAL (operands[2]);
3212   int op0_regno = REGNO (operands[0]);
3213   int op1_regno = REGNO (operands[1]);
3214   int fldDM;
3215
3216   gcc_assert (IN_RANGE (element, 0, 1));
3217   gcc_assert (VSX_REGNO_P (op1_regno));
3218
3219   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3220     {
3221       if (op0_regno == op1_regno)
3222         return ASM_COMMENT_START " vec_extract to same register";
3223
3224       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3225                && TARGET_POWERPC64)
3226         return "mfvsrd %0,%x1";
3227
3228       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3229         return "fmr %0,%1";
3230
3231       else if (VSX_REGNO_P (op0_regno))
3232         return "xxlor %x0,%x1,%x1";
3233
3234       else
3235         gcc_unreachable ();
3236     }
3237
3238   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3239            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3240     return "mfvsrld %0,%x1";
3241
3242   else if (VSX_REGNO_P (op0_regno))
3243     {
3244       fldDM = element << 1;
3245       if (!BYTES_BIG_ENDIAN)
3246         fldDM = 3 - fldDM;
3247       operands[3] = GEN_INT (fldDM);
3248       return "xxpermdi %x0,%x1,%x1,%3";
3249     }
3250
3251   else
3252     gcc_unreachable ();
3253 }
3254   [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
3255
3256 ;; Optimize extracting a single scalar element from memory.
3257 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3258   [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3259         (vec_select:<VSX_D:VS_scalar>
3260          (match_operand:VSX_D 1 "memory_operand" "m,m")
3261          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3262    (clobber (match_scratch:P 3 "=&b,&b"))]
3263   "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3264   "#"
3265   "&& reload_completed"
3266   [(set (match_dup 0) (match_dup 4))]
3267 {
3268   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3269                                            operands[3], <VSX_D:VS_scalar>mode);
3270 }
3271   [(set_attr "type" "fpload,load")
3272    (set_attr "length" "8")])
3273
3274 ;; Optimize storing a single scalar element that is the right location to
3275 ;; memory
3276 (define_insn "*vsx_extract_<mode>_store"
3277   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3278         (vec_select:<VS_scalar>
3279          (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
3280          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3281   "VECTOR_MEM_VSX_P (<MODE>mode)"
3282   "@
3283    stfd%U0%X0 %1,%0
3284    stxsd%U0x %x1,%y0
3285    stxsd %1,%0"
3286   [(set_attr "type" "fpstore")
3287    (set_attr "length" "4")])
3288
3289 ;; Variable V2DI/V2DF extract shift
3290 (define_insn "vsx_vslo_<mode>"
3291   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3292         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3293                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3294                             UNSPEC_VSX_VSLO))]
3295   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3296   "vslo %0,%1,%2"
3297   [(set_attr "type" "vecperm")])
3298
3299 ;; Variable V2DI/V2DF extract
3300 (define_insn_and_split "vsx_extract_<mode>_var"
3301   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3302         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3303                              (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3304                             UNSPEC_VSX_EXTRACT))
3305    (clobber (match_scratch:DI 3 "=r,&b,&b"))
3306    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3307   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3308   "#"
3309   "&& reload_completed"
3310   [(const_int 0)]
3311 {
3312   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3313                                 operands[3], operands[4]);
3314   DONE;
3315 })
3316
3317 ;; Extract a SF element from V4SF
3318 (define_insn_and_split "vsx_extract_v4sf"
3319   [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
3320         (vec_select:SF
3321          (match_operand:V4SF 1 "vsx_register_operand" "wa")
3322          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3323    (clobber (match_scratch:V4SF 3 "=0"))]
3324   "VECTOR_UNIT_VSX_P (V4SFmode)"
3325   "#"
3326   "&& 1"
3327   [(const_int 0)]
3328 {
3329   rtx op0 = operands[0];
3330   rtx op1 = operands[1];
3331   rtx op2 = operands[2];
3332   rtx op3 = operands[3];
3333   rtx tmp;
3334   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3335
3336   if (ele == 0)
3337     tmp = op1;
3338   else
3339     {
3340       if (GET_CODE (op3) == SCRATCH)
3341         op3 = gen_reg_rtx (V4SFmode);
3342       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3343       tmp = op3;
3344     }
3345   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3346   DONE;
3347 }
3348   [(set_attr "length" "8")
3349    (set_attr "type" "fp")])
3350
3351 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3352   [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3353         (vec_select:SF
3354          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3355          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3356    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3357   "VECTOR_MEM_VSX_P (V4SFmode)"
3358   "#"
3359   "&& reload_completed"
3360   [(set (match_dup 0) (match_dup 4))]
3361 {
3362   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3363                                            operands[3], SFmode);
3364 }
3365   [(set_attr "type" "fpload,fpload,fpload,load")
3366    (set_attr "length" "8")])
3367
3368 ;; Variable V4SF extract
3369 (define_insn_and_split "vsx_extract_v4sf_var"
3370   [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3371         (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3372                     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3373                    UNSPEC_VSX_EXTRACT))
3374    (clobber (match_scratch:DI 3 "=r,&b,&b"))
3375    (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3376   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3377   "#"
3378   "&& reload_completed"
3379   [(const_int 0)]
3380 {
3381   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3382                                 operands[3], operands[4]);
3383   DONE;
3384 })
3385
3386 ;; Expand the builtin form of xxpermdi to canonical rtl.
3387 (define_expand "vsx_xxpermdi_<mode>"
3388   [(match_operand:VSX_L 0 "vsx_register_operand")
3389    (match_operand:VSX_L 1 "vsx_register_operand")
3390    (match_operand:VSX_L 2 "vsx_register_operand")
3391    (match_operand:QI 3 "u5bit_cint_operand")]
3392   "VECTOR_MEM_VSX_P (<MODE>mode)"
3393 {
3394   rtx target = operands[0];
3395   rtx op0 = operands[1];
3396   rtx op1 = operands[2];
3397   int mask = INTVAL (operands[3]);
3398   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3399   rtx perm1 = GEN_INT ((mask & 1) + 2);
3400   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3401
3402   if (<MODE>mode == V2DFmode)
3403     gen = gen_vsx_xxpermdi2_v2df_1;
3404   else
3405     {
3406       gen = gen_vsx_xxpermdi2_v2di_1;
3407       if (<MODE>mode != V2DImode)
3408         {
3409           target = gen_lowpart (V2DImode, target);
3410           op0 = gen_lowpart (V2DImode, op0);
3411           op1 = gen_lowpart (V2DImode, op1);
3412         }
3413     }
3414   emit_insn (gen (target, op0, op1, perm0, perm1));
3415   DONE;
3416 })
3417
3418 ;; Special version of xxpermdi that retains big-endian semantics.
3419 (define_expand "vsx_xxpermdi_<mode>_be"
3420   [(match_operand:VSX_L 0 "vsx_register_operand")
3421    (match_operand:VSX_L 1 "vsx_register_operand")
3422    (match_operand:VSX_L 2 "vsx_register_operand")
3423    (match_operand:QI 3 "u5bit_cint_operand")]
3424   "VECTOR_MEM_VSX_P (<MODE>mode)"
3425 {
3426   rtx target = operands[0];
3427   rtx op0 = operands[1];
3428   rtx op1 = operands[2];
3429   int mask = INTVAL (operands[3]);
3430   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3431   rtx perm1 = GEN_INT ((mask & 1) + 2);
3432   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3433
3434   if (<MODE>mode == V2DFmode)
3435     gen = gen_vsx_xxpermdi2_v2df_1;
3436   else
3437     {
3438       gen = gen_vsx_xxpermdi2_v2di_1;
3439       if (<MODE>mode != V2DImode)
3440         {
3441           target = gen_lowpart (V2DImode, target);
3442           op0 = gen_lowpart (V2DImode, op0);
3443           op1 = gen_lowpart (V2DImode, op1);
3444         }
3445     }
3446   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3447      transformation we don't want; it is necessary for
3448      rs6000_expand_vec_perm_const_1 but not for this use.  So we
3449      prepare for that by reversing the transformation here.  */
3450   if (BYTES_BIG_ENDIAN)
3451     emit_insn (gen (target, op0, op1, perm0, perm1));
3452   else
3453     {
3454       rtx p0 = GEN_INT (3 - INTVAL (perm1));
3455       rtx p1 = GEN_INT (3 - INTVAL (perm0));
3456       emit_insn (gen (target, op1, op0, p0, p1));
3457     }
3458   DONE;
3459 })
3460
3461 (define_insn "vsx_xxpermdi2_<mode>_1"
3462   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3463         (vec_select:VSX_D
3464           (vec_concat:<VS_double>
3465             (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3466             (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3467           (parallel [(match_operand 3 "const_0_to_1_operand" "")
3468                      (match_operand 4 "const_2_to_3_operand" "")])))]
3469   "VECTOR_MEM_VSX_P (<MODE>mode)"
3470 {
3471   int op3, op4, mask;
3472
3473   /* For little endian, swap operands and invert/swap selectors
3474      to get the correct xxpermdi.  The operand swap sets up the
3475      inputs as a little endian array.  The selectors are swapped
3476      because they are defined to use big endian ordering.  The
3477      selectors are inverted to get the correct doublewords for
3478      little endian ordering.  */
3479   if (BYTES_BIG_ENDIAN)
3480     {
3481       op3 = INTVAL (operands[3]);
3482       op4 = INTVAL (operands[4]);
3483     }
3484   else
3485     {
3486       op3 = 3 - INTVAL (operands[4]);
3487       op4 = 3 - INTVAL (operands[3]);
3488     }
3489
3490   mask = (op3 << 1) | (op4 - 2);
3491   operands[3] = GEN_INT (mask);
3492
3493   if (BYTES_BIG_ENDIAN)
3494     return "xxpermdi %x0,%x1,%x2,%3";
3495   else
3496     return "xxpermdi %x0,%x2,%x1,%3";
3497 }
3498   [(set_attr "type" "vecperm")])
3499
3500 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3501 ;; none of the small types were allowed in a vector register, so we had to
3502 ;; extract to a DImode and either do a direct move or store.
3503 (define_expand  "vsx_extract_<mode>"
3504   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3505                    (vec_select:<VS_scalar>
3506                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3507                     (parallel [(match_operand:QI 2 "const_int_operand")])))
3508               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3509   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3510 {
3511   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3512   if (TARGET_P9_VECTOR)
3513     {
3514       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3515                                             operands[2]));
3516       DONE;
3517     }
3518 })
3519
3520 (define_insn "vsx_extract_<mode>_p9"
3521   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3522         (vec_select:<VS_scalar>
3523          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3524          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3525    (clobber (match_scratch:SI 3 "=r,X"))]
3526   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3527 {
3528   if (which_alternative == 0)
3529     return "#";
3530
3531   else
3532     {
3533       HOST_WIDE_INT elt = INTVAL (operands[2]);
3534       HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
3535                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3536                                : elt);
3537
3538       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3539       HOST_WIDE_INT offset = unit_size * elt_adj;
3540
3541       operands[2] = GEN_INT (offset);
3542       if (unit_size == 4)
3543         return "xxextractuw %x0,%x1,%2";
3544       else
3545         return "vextractu<wd> %0,%1,%2";
3546     }
3547 }
3548   [(set_attr "type" "vecsimple")])
3549
3550 (define_split
3551   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3552         (vec_select:<VS_scalar>
3553          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3554          (parallel [(match_operand:QI 2 "const_int_operand")])))
3555    (clobber (match_operand:SI 3 "int_reg_operand"))]
3556   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3557   [(const_int 0)]
3558 {
3559   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3560   rtx op1 = operands[1];
3561   rtx op2 = operands[2];
3562   rtx op3 = operands[3];
3563   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3564
3565   emit_move_insn (op3, GEN_INT (offset));
3566   if (VECTOR_ELT_ORDER_BIG)
3567     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3568   else
3569     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3570   DONE;
3571 })
3572
3573 ;; Optimize zero extracts to eliminate the AND after the extract.
3574 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3575   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3576         (zero_extend:DI
3577          (vec_select:<VS_scalar>
3578           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3579           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3580    (clobber (match_scratch:SI 3 "=r,X"))]
3581   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3582   "#"
3583   "&& reload_completed"
3584   [(parallel [(set (match_dup 4)
3585                    (vec_select:<VS_scalar>
3586                     (match_dup 1)
3587                     (parallel [(match_dup 2)])))
3588               (clobber (match_dup 3))])]
3589 {
3590   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3591 })
3592
3593 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3594 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3595   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3596         (vec_select:<VS_scalar>
3597          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3598          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3599    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3600    (clobber (match_scratch:SI 4 "=X,&r"))]
3601   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3602   "#"
3603   "&& reload_completed"
3604   [(parallel [(set (match_dup 3)
3605                    (vec_select:<VS_scalar>
3606                     (match_dup 1)
3607                     (parallel [(match_dup 2)])))
3608               (clobber (match_dup 4))])
3609    (set (match_dup 0)
3610         (match_dup 3))])
3611
3612 (define_insn_and_split  "*vsx_extract_si"
3613   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3614         (vec_select:SI
3615          (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3616          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3617    (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3618   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3619   "#"
3620   "&& reload_completed"
3621   [(const_int 0)]
3622 {
3623   rtx dest = operands[0];
3624   rtx src = operands[1];
3625   rtx element = operands[2];
3626   rtx vec_tmp = operands[3];
3627   int value;
3628
3629   if (!VECTOR_ELT_ORDER_BIG)
3630     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3631
3632   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3633      instruction.  */
3634   value = INTVAL (element);
3635   if (value != 1)
3636     emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3637   else
3638     vec_tmp = src;
3639
3640   if (MEM_P (operands[0]))
3641     {
3642       if (can_create_pseudo_p ())
3643         dest = rs6000_address_for_fpconvert (dest);
3644
3645       if (TARGET_P8_VECTOR)
3646         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3647       else
3648         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3649     }
3650
3651   else if (TARGET_P8_VECTOR)
3652     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3653   else
3654     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3655                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
3656
3657   DONE;
3658 }
3659   [(set_attr "type" "mftgpr,vecperm,fpstore")
3660    (set_attr "length" "8")])
3661
3662 (define_insn_and_split  "*vsx_extract_<mode>_p8"
3663   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3664         (vec_select:<VS_scalar>
3665          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3666          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3667    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3668   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3669    && !TARGET_P9_VECTOR"
3670   "#"
3671   "&& reload_completed"
3672   [(const_int 0)]
3673 {
3674   rtx dest = operands[0];
3675   rtx src = operands[1];
3676   rtx element = operands[2];
3677   rtx vec_tmp = operands[3];
3678   int value;
3679
3680   if (!VECTOR_ELT_ORDER_BIG)
3681     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3682
3683   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3684      instruction.  */
3685   value = INTVAL (element);
3686   if (<MODE>mode == V16QImode)
3687     {
3688       if (value != 7)
3689         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3690       else
3691         vec_tmp = src;
3692     }
3693   else if (<MODE>mode == V8HImode)
3694     {
3695       if (value != 3)
3696         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3697       else
3698         vec_tmp = src;
3699     }
3700   else
3701     gcc_unreachable ();
3702
3703   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3704                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
3705   DONE;
3706 }
3707   [(set_attr "type" "mftgpr")])
3708
3709 ;; Optimize extracting a single scalar element from memory.
3710 (define_insn_and_split "*vsx_extract_<mode>_load"
3711   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3712         (vec_select:<VS_scalar>
3713          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3714          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3715    (clobber (match_scratch:DI 3 "=&b"))]
3716   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3717   "#"
3718   "&& reload_completed"
3719   [(set (match_dup 0) (match_dup 4))]
3720 {
3721   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3722                                            operands[3], <VS_scalar>mode);
3723 }
3724   [(set_attr "type" "load")
3725    (set_attr "length" "8")])
3726
3727 ;; Variable V16QI/V8HI/V4SI extract
3728 (define_insn_and_split "vsx_extract_<mode>_var"
3729   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3730         (unspec:<VS_scalar>
3731          [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3732           (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3733          UNSPEC_VSX_EXTRACT))
3734    (clobber (match_scratch:DI 3 "=r,r,&b"))
3735    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3736   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3737   "#"
3738   "&& reload_completed"
3739   [(const_int 0)]
3740 {
3741   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3742                                 operands[3], operands[4]);
3743   DONE;
3744 })
3745
3746 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3747   [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3748         (zero_extend:SDI
3749          (unspec:<VSX_EXTRACT_I:VS_scalar>
3750           [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3751            (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3752           UNSPEC_VSX_EXTRACT)))
3753    (clobber (match_scratch:DI 3 "=r,r,&b"))
3754    (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3755   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3756   "#"
3757   "&& reload_completed"
3758   [(const_int 0)]
3759 {
3760   machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3761   rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3762                                 operands[1], operands[2],
3763                                 operands[3], operands[4]);
3764   DONE;
3765 })
3766
3767 ;; VSX_EXTRACT optimizations
3768 ;; Optimize double d = (double) vec_extract (vi, <n>)
3769 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3770 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3771   [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3772         (any_float:DF
3773          (vec_select:SI
3774           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3775           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3776    (clobber (match_scratch:V4SI 3 "=v"))]
3777   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3778   "#"
3779   "&& 1"
3780   [(const_int 0)]
3781 {
3782   rtx dest = operands[0];
3783   rtx src = operands[1];
3784   rtx element = operands[2];
3785   rtx v4si_tmp = operands[3];
3786   int value;
3787
3788   if (!VECTOR_ELT_ORDER_BIG)
3789     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3790
3791   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3792      instruction.  */
3793   value = INTVAL (element);
3794   if (value != 0)
3795     {
3796       if (GET_CODE (v4si_tmp) == SCRATCH)
3797         v4si_tmp = gen_reg_rtx (V4SImode);
3798       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3799     }
3800   else
3801     v4si_tmp = src;
3802
3803   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3804   DONE;
3805 })
3806
3807 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3808 ;; where <type> is a floating point type that supported by the hardware that is
3809 ;; not double.  First convert the value to double, and then to the desired
3810 ;; type.
3811 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3812   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3813         (any_float:VSX_EXTRACT_FL
3814          (vec_select:SI
3815           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3816           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3817    (clobber (match_scratch:V4SI 3 "=v"))
3818    (clobber (match_scratch:DF 4 "=ws"))]
3819   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3820   "#"
3821   "&& 1"
3822   [(const_int 0)]
3823 {
3824   rtx dest = operands[0];
3825   rtx src = operands[1];
3826   rtx element = operands[2];
3827   rtx v4si_tmp = operands[3];
3828   rtx df_tmp = operands[4];
3829   int value;
3830
3831   if (!VECTOR_ELT_ORDER_BIG)
3832     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3833
3834   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3835      instruction.  */
3836   value = INTVAL (element);
3837   if (value != 0)
3838     {
3839       if (GET_CODE (v4si_tmp) == SCRATCH)
3840         v4si_tmp = gen_reg_rtx (V4SImode);
3841       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3842     }
3843   else
3844     v4si_tmp = src;
3845
3846   if (GET_CODE (df_tmp) == SCRATCH)
3847     df_tmp = gen_reg_rtx (DFmode);
3848
3849   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3850
3851   if (<MODE>mode == SFmode)
3852     emit_insn (gen_truncdfsf2 (dest, df_tmp));
3853   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3854     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3855   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3856            && TARGET_FLOAT128_HW)
3857     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3858   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3859     emit_insn (gen_extenddfif2 (dest, df_tmp));
3860   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3861     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3862   else
3863     gcc_unreachable ();
3864
3865   DONE;
3866 })
3867
3868 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3869 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3870 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3871 ;; vector short or vector unsigned short.
3872 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3873   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3874         (float:FL_CONV
3875          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3876           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3877           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3878    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3879   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3880    && TARGET_P9_VECTOR"
3881   "#"
3882   "&& reload_completed"
3883   [(parallel [(set (match_dup 3)
3884                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3885                     (match_dup 1)
3886                     (parallel [(match_dup 2)])))
3887               (clobber (scratch:SI))])
3888    (set (match_dup 4)
3889         (sign_extend:DI (match_dup 3)))
3890    (set (match_dup 0)
3891         (float:<FL_CONV:MODE> (match_dup 4)))]
3892 {
3893   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3894 })
3895
3896 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3897   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3898         (unsigned_float:FL_CONV
3899          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3900           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3901           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3902    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3903   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3904    && TARGET_P9_VECTOR"
3905   "#"
3906   "&& reload_completed"
3907   [(parallel [(set (match_dup 3)
3908                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3909                     (match_dup 1)
3910                     (parallel [(match_dup 2)])))
3911               (clobber (scratch:SI))])
3912    (set (match_dup 0)
3913         (float:<FL_CONV:MODE> (match_dup 4)))]
3914 {
3915   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3916 })
3917
3918 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3919 (define_insn "vsx_set_<mode>_p9"
3920   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3921         (unspec:VSX_EXTRACT_I
3922          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3923           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3924           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3925          UNSPEC_VSX_SET))]
3926   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3927 {
3928   int ele = INTVAL (operands[3]);
3929   int nunits = GET_MODE_NUNITS (<MODE>mode);
3930
3931   if (!VECTOR_ELT_ORDER_BIG)
3932     ele = nunits - 1 - ele;
3933
3934   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3935   if (<MODE>mode == V4SImode)
3936     return "xxinsertw %x0,%x2,%3";
3937   else
3938     return "vinsert<wd> %0,%2,%3";
3939 }
3940   [(set_attr "type" "vecperm")])
3941
3942 (define_insn_and_split "vsx_set_v4sf_p9"
3943   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3944         (unspec:V4SF
3945          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3946           (match_operand:SF 2 "gpc_reg_operand" "ww")
3947           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3948          UNSPEC_VSX_SET))
3949    (clobber (match_scratch:SI 4 "=&wJwK"))]
3950   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3951   "#"
3952   "&& reload_completed"
3953   [(set (match_dup 5)
3954         (unspec:V4SF [(match_dup 2)]
3955                      UNSPEC_VSX_CVDPSPN))
3956    (parallel [(set (match_dup 4)
3957                    (vec_select:SI (match_dup 6)
3958                                   (parallel [(match_dup 7)])))
3959               (clobber (scratch:SI))])
3960    (set (match_dup 8)
3961         (unspec:V4SI [(match_dup 8)
3962                       (match_dup 4)
3963                       (match_dup 3)]
3964                      UNSPEC_VSX_SET))]
3965 {
3966   unsigned int tmp_regno = reg_or_subregno (operands[4]);
3967
3968   operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3969   operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3970   operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
3971   operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3972 }
3973   [(set_attr "type" "vecperm")
3974    (set_attr "length" "12")])
3975
3976 ;; Special case setting 0.0f to a V4SF element
3977 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3978   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3979         (unspec:V4SF
3980          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3981           (match_operand:SF 2 "zero_fp_constant" "j")
3982           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3983          UNSPEC_VSX_SET))
3984    (clobber (match_scratch:SI 4 "=&wJwK"))]
3985   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3986   "#"
3987   "&& reload_completed"
3988   [(set (match_dup 4)
3989         (const_int 0))
3990    (set (match_dup 5)
3991         (unspec:V4SI [(match_dup 5)
3992                       (match_dup 4)
3993                       (match_dup 3)]
3994                      UNSPEC_VSX_SET))]
3995 {
3996   operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3997 }
3998   [(set_attr "type" "vecperm")
3999    (set_attr "length" "8")])
4000
4001 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4002 ;; that is in the default scalar position (1 for big endian, 2 for little
4003 ;; endian).  We just need to do an xxinsertw since the element is in the
4004 ;; correct location.
4005
4006 (define_insn "*vsx_insert_extract_v4sf_p9"
4007   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4008         (unspec:V4SF
4009          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4010           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4011                          (parallel
4012                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4013           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4014          UNSPEC_VSX_SET))]
4015   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4016    && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4017 {
4018   int ele = INTVAL (operands[4]);
4019
4020   if (!VECTOR_ELT_ORDER_BIG)
4021     ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4022
4023   operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4024   return "xxinsertw %x0,%x2,%4";
4025 }
4026   [(set_attr "type" "vecperm")])
4027
4028 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4029 ;; that is in the default scalar position (1 for big endian, 2 for little
4030 ;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4031
4032 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4033   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4034         (unspec:V4SF
4035          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4036           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4037                          (parallel
4038                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4039           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4040          UNSPEC_VSX_SET))
4041    (clobber (match_scratch:SI 5 "=&wJwK"))]
4042   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4043    && TARGET_P9_VECTOR && TARGET_POWERPC64
4044    && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4045   "#"
4046   "&& 1"
4047   [(parallel [(set (match_dup 5)
4048                    (vec_select:SI (match_dup 6)
4049                                   (parallel [(match_dup 3)])))
4050               (clobber (scratch:SI))])
4051    (set (match_dup 7)
4052         (unspec:V4SI [(match_dup 8)
4053                       (match_dup 5)
4054                       (match_dup 4)]
4055                      UNSPEC_VSX_SET))]
4056 {
4057   if (GET_CODE (operands[5]) == SCRATCH)
4058     operands[5] = gen_reg_rtx (SImode);
4059
4060   operands[6] = gen_lowpart (V4SImode, operands[2]);
4061   operands[7] = gen_lowpart (V4SImode, operands[0]);
4062   operands[8] = gen_lowpart (V4SImode, operands[1]);
4063 }
4064   [(set_attr "type" "vecperm")])
4065
4066 ;; Expanders for builtins
4067 (define_expand "vsx_mergel_<mode>"
4068   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4069    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4070    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4071   "VECTOR_MEM_VSX_P (<MODE>mode)"
4072 {
4073   rtvec v;
4074   rtx x;
4075
4076   /* Special handling for LE with -maltivec=be.  */
4077   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4078     {
4079       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4080       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4081     }
4082   else
4083     {
4084       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4085       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4086     }
4087
4088   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4089   emit_insn (gen_rtx_SET (operands[0], x));
4090   DONE;
4091 })
4092
4093 (define_expand "vsx_mergeh_<mode>"
4094   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4095    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4096    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4097   "VECTOR_MEM_VSX_P (<MODE>mode)"
4098 {
4099   rtvec v;
4100   rtx x;
4101
4102   /* Special handling for LE with -maltivec=be.  */
4103   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4104     {
4105       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4106       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4107     }
4108   else
4109     {
4110       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4111       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4112     }
4113
4114   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4115   emit_insn (gen_rtx_SET (operands[0], x));
4116   DONE;
4117 })
4118
4119 ;; V2DF/V2DI splat
4120 ;; We separate the register splat insn from the memory splat insn to force the
4121 ;; register allocator to generate the indexed form of the SPLAT when it is
4122 ;; given an offsettable memory reference.  Otherwise, if the register and
4123 ;; memory insns were combined into a single insn, the register allocator will
4124 ;; load the value into a register, and then do a double word permute.
4125 (define_expand "vsx_splat_<mode>"
4126   [(set (match_operand:VSX_D 0 "vsx_register_operand")
4127         (vec_duplicate:VSX_D
4128          (match_operand:<VS_scalar> 1 "input_operand")))]
4129   "VECTOR_MEM_VSX_P (<MODE>mode)"
4130 {
4131   rtx op1 = operands[1];
4132   if (MEM_P (op1))
4133     operands[1] = rs6000_address_for_fpconvert (op1);
4134   else if (!REG_P (op1))
4135     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4136 })
4137
4138 (define_insn "vsx_splat_<mode>_reg"
4139   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4140         (vec_duplicate:VSX_D
4141          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
4142   "VECTOR_MEM_VSX_P (<MODE>mode)"
4143   "@
4144    xxpermdi %x0,%x1,%x1,0
4145    mtvsrdd %x0,%1,%1"
4146   [(set_attr "type" "vecperm")])
4147
4148 (define_insn "vsx_splat_<VSX_D:mode>_mem"
4149   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4150         (vec_duplicate:VSX_D
4151          (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4152   "VECTOR_MEM_VSX_P (<MODE>mode)"
4153   "lxvdsx %x0,%y1"
4154   [(set_attr "type" "vecload")])
4155
4156 ;; V4SI splat support
4157 (define_insn "vsx_splat_v4si"
4158   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4159         (vec_duplicate:V4SI
4160          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4161   "TARGET_P9_VECTOR"
4162   "@
4163    mtvsrws %x0,%1
4164    lxvwsx %x0,%y1"
4165   [(set_attr "type" "vecperm,vecload")])
4166
4167 ;; SImode is not currently allowed in vector registers.  This pattern
4168 ;; allows us to use direct move to get the value in a vector register
4169 ;; so that we can use XXSPLTW
4170 (define_insn "vsx_splat_v4si_di"
4171   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4172         (vec_duplicate:V4SI
4173          (truncate:SI
4174           (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4175   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4176   "@
4177    xxspltw %x0,%x1,1
4178    mtvsrws %x0,%1"
4179   [(set_attr "type" "vecperm")])
4180
4181 ;; V4SF splat (ISA 3.0)
4182 (define_insn_and_split "vsx_splat_v4sf"
4183   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4184         (vec_duplicate:V4SF
4185          (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
4186   "TARGET_P9_VECTOR"
4187   "@
4188    lxvwsx %x0,%y1
4189    #
4190    mtvsrws %x0,%1"
4191   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4192   [(set (match_dup 0)
4193         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4194    (set (match_dup 0)
4195         (unspec:V4SF [(match_dup 0)
4196                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4197   ""
4198   [(set_attr "type" "vecload,vecperm,mftgpr")
4199    (set_attr "length" "4,8,4")])
4200
4201 ;; V4SF/V4SI splat from a vector element
4202 (define_insn "vsx_xxspltw_<mode>"
4203   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4204         (vec_duplicate:VSX_W
4205          (vec_select:<VS_scalar>
4206           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4207           (parallel
4208            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4209   "VECTOR_MEM_VSX_P (<MODE>mode)"
4210 {
4211   if (!BYTES_BIG_ENDIAN)
4212     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4213
4214   return "xxspltw %x0,%x1,%2";
4215 }
4216   [(set_attr "type" "vecperm")])
4217
4218 (define_insn "vsx_xxspltw_<mode>_direct"
4219   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4220         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4221                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4222                       UNSPEC_VSX_XXSPLTW))]
4223   "VECTOR_MEM_VSX_P (<MODE>mode)"
4224   "xxspltw %x0,%x1,%2"
4225   [(set_attr "type" "vecperm")])
4226
4227 ;; V16QI/V8HI splat support on ISA 2.07
4228 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4229   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4230         (vec_duplicate:VSX_SPLAT_I
4231          (truncate:<VS_scalar>
4232           (match_operand:DI 1 "altivec_register_operand" "v"))))]
4233   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4234   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4235   [(set_attr "type" "vecperm")])
4236
4237 ;; V2DF/V2DI splat for use by vec_splat builtin
4238 (define_insn "vsx_xxspltd_<mode>"
4239   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4240         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4241                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4242                       UNSPEC_VSX_XXSPLTD))]
4243   "VECTOR_MEM_VSX_P (<MODE>mode)"
4244 {
4245   if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
4246       || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
4247     return "xxpermdi %x0,%x1,%x1,0";
4248   else
4249     return "xxpermdi %x0,%x1,%x1,3";
4250 }
4251   [(set_attr "type" "vecperm")])
4252
4253 ;; V4SF/V4SI interleave
4254 (define_insn "vsx_xxmrghw_<mode>"
4255   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4256         (vec_select:VSX_W
4257           (vec_concat:<VS_double>
4258             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4259             (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
4260           (parallel [(const_int 0) (const_int 4)
4261                      (const_int 1) (const_int 5)])))]
4262   "VECTOR_MEM_VSX_P (<MODE>mode)"
4263 {
4264   if (BYTES_BIG_ENDIAN)
4265     return "xxmrghw %x0,%x1,%x2";
4266   else
4267     return "xxmrglw %x0,%x2,%x1";
4268 }
4269   [(set_attr "type" "vecperm")])
4270
4271 (define_insn "vsx_xxmrglw_<mode>"
4272   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4273         (vec_select:VSX_W
4274           (vec_concat:<VS_double>
4275             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4276             (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
4277           (parallel [(const_int 2) (const_int 6)
4278                      (const_int 3) (const_int 7)])))]
4279   "VECTOR_MEM_VSX_P (<MODE>mode)"
4280 {
4281   if (BYTES_BIG_ENDIAN)
4282     return "xxmrglw %x0,%x1,%x2";
4283   else
4284     return "xxmrghw %x0,%x2,%x1";
4285 }
4286   [(set_attr "type" "vecperm")])
4287
4288 ;; Shift left double by word immediate
4289 (define_insn "vsx_xxsldwi_<mode>"
4290   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4291         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4292                        (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
4293                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
4294                       UNSPEC_VSX_SLDWI))]
4295   "VECTOR_MEM_VSX_P (<MODE>mode)"
4296   "xxsldwi %x0,%x1,%x2,%3"
4297   [(set_attr "type" "vecperm")])
4298
4299 \f
4300 ;; Vector reduction insns and splitters
4301
4302 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4303   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4304         (VEC_reduc:V2DF
4305          (vec_concat:V2DF
4306           (vec_select:DF
4307            (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4308            (parallel [(const_int 1)]))
4309           (vec_select:DF
4310            (match_dup 1)
4311            (parallel [(const_int 0)])))
4312          (match_dup 1)))
4313    (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4314   "VECTOR_UNIT_VSX_P (V2DFmode)"
4315   "#"
4316   ""
4317   [(const_int 0)]
4318 {
4319   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4320              ? gen_reg_rtx (V2DFmode)
4321              : operands[2];
4322   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4323   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4324   DONE;
4325 }
4326   [(set_attr "length" "8")
4327    (set_attr "type" "veccomplex")])
4328
4329 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4330   [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4331         (VEC_reduc:V4SF
4332          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4333          (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4334    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4335    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4336   "VECTOR_UNIT_VSX_P (V4SFmode)"
4337   "#"
4338   ""
4339   [(const_int 0)]
4340 {
4341   rtx op0 = operands[0];
4342   rtx op1 = operands[1];
4343   rtx tmp2, tmp3, tmp4;
4344
4345   if (can_create_pseudo_p ())
4346     {
4347       tmp2 = gen_reg_rtx (V4SFmode);
4348       tmp3 = gen_reg_rtx (V4SFmode);
4349       tmp4 = gen_reg_rtx (V4SFmode);
4350     }
4351   else
4352     {
4353       tmp2 = operands[2];
4354       tmp3 = operands[3];
4355       tmp4 = tmp2;
4356     }
4357
4358   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4359   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4360   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4361   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4362   DONE;
4363 }
4364   [(set_attr "length" "16")
4365    (set_attr "type" "veccomplex")])
4366
4367 ;; Combiner patterns with the vector reduction patterns that knows we can get
4368 ;; to the top element of the V2DF array without doing an extract.
4369
4370 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4371   [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
4372         (vec_select:DF
4373          (VEC_reduc:V2DF
4374           (vec_concat:V2DF
4375            (vec_select:DF
4376             (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4377             (parallel [(const_int 1)]))
4378            (vec_select:DF
4379             (match_dup 1)
4380             (parallel [(const_int 0)])))
4381           (match_dup 1))
4382          (parallel [(const_int 1)])))
4383    (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4384   "VECTOR_UNIT_VSX_P (V2DFmode)"
4385   "#"
4386   ""
4387   [(const_int 0)]
4388 {
4389   rtx hi = gen_highpart (DFmode, operands[1]);
4390   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4391             ? gen_reg_rtx (DFmode)
4392             : operands[2];
4393
4394   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4395   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4396   DONE;
4397 }
4398   [(set_attr "length" "8")
4399    (set_attr "type" "veccomplex")])
4400
4401 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4402   [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4403         (vec_select:SF
4404          (VEC_reduc:V4SF
4405           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4406           (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4407          (parallel [(const_int 3)])))
4408    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4409    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4410    (clobber (match_scratch:V4SF 4 "=0,0"))]
4411   "VECTOR_UNIT_VSX_P (V4SFmode)"
4412   "#"
4413   ""
4414   [(const_int 0)]
4415 {
4416   rtx op0 = operands[0];
4417   rtx op1 = operands[1];
4418   rtx tmp2, tmp3, tmp4, tmp5;
4419
4420   if (can_create_pseudo_p ())
4421     {
4422       tmp2 = gen_reg_rtx (V4SFmode);
4423       tmp3 = gen_reg_rtx (V4SFmode);
4424       tmp4 = gen_reg_rtx (V4SFmode);
4425       tmp5 = gen_reg_rtx (V4SFmode);
4426     }
4427   else
4428     {
4429       tmp2 = operands[2];
4430       tmp3 = operands[3];
4431       tmp4 = tmp2;
4432       tmp5 = operands[4];
4433     }
4434
4435   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4436   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4437   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4438   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4439   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4440   DONE;
4441 }
4442   [(set_attr "length" "20")
4443    (set_attr "type" "veccomplex")])
4444
4445 \f
4446 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4447 (define_peephole
4448   [(set (match_operand:P 0 "base_reg_operand")
4449         (match_operand:P 1 "short_cint_operand"))
4450    (set (match_operand:VSX_M 2 "vsx_register_operand")
4451         (mem:VSX_M (plus:P (match_dup 0)
4452                            (match_operand:P 3 "int_reg_operand"))))]
4453   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4454   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4455   [(set_attr "length" "8")
4456    (set_attr "type" "vecload")])
4457
4458 (define_peephole
4459   [(set (match_operand:P 0 "base_reg_operand")
4460         (match_operand:P 1 "short_cint_operand"))
4461    (set (match_operand:VSX_M 2 "vsx_register_operand")
4462         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4463                            (match_dup 0))))]
4464   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4465   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4466   [(set_attr "length" "8")
4467    (set_attr "type" "vecload")])
4468
4469 \f
4470 ;; ISA 3.0 vector extend sign support
4471
4472 (define_insn "vsx_sign_extend_qi_<mode>"
4473   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4474         (unspec:VSINT_84
4475          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4476          UNSPEC_VSX_SIGN_EXTEND))]
4477   "TARGET_P9_VECTOR"
4478   "vextsb2<wd> %0,%1"
4479   [(set_attr "type" "vecexts")])
4480
4481 (define_insn "vsx_sign_extend_hi_<mode>"
4482   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4483         (unspec:VSINT_84
4484          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4485          UNSPEC_VSX_SIGN_EXTEND))]
4486   "TARGET_P9_VECTOR"
4487   "vextsh2<wd> %0,%1"
4488   [(set_attr "type" "vecexts")])
4489
4490 (define_insn "*vsx_sign_extend_si_v2di"
4491   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4492         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4493                      UNSPEC_VSX_SIGN_EXTEND))]
4494   "TARGET_P9_VECTOR"
4495   "vextsw2d %0,%1"
4496   [(set_attr "type" "vecexts")])
4497
4498 \f
4499 ;; ISA 3.0 Binary Floating-Point Support
4500
4501 ;; VSX Scalar Extract Exponent Quad-Precision
4502 (define_insn "xsxexpqp_<mode>"
4503   [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4504         (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4505          UNSPEC_VSX_SXEXPDP))]
4506   "TARGET_P9_VECTOR"
4507   "xsxexpqp %0,%1"
4508   [(set_attr "type" "vecmove")])
4509
4510 ;; VSX Scalar Extract Exponent Double-Precision
4511 (define_insn "xsxexpdp"
4512   [(set (match_operand:DI 0 "register_operand" "=r")
4513         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4514          UNSPEC_VSX_SXEXPDP))]
4515   "TARGET_P9_VECTOR && TARGET_64BIT"
4516   "xsxexpdp %0,%x1"
4517   [(set_attr "type" "integer")])
4518
4519 ;; VSX Scalar Extract Significand Quad-Precision
4520 (define_insn "xsxsigqp_<mode>"
4521   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4522         (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4523          UNSPEC_VSX_SXSIG))]
4524   "TARGET_P9_VECTOR"
4525   "xsxsigqp %0,%1"
4526   [(set_attr "type" "vecmove")])
4527
4528 ;; VSX Scalar Extract Significand Double-Precision
4529 (define_insn "xsxsigdp"
4530   [(set (match_operand:DI 0 "register_operand" "=r")
4531         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4532          UNSPEC_VSX_SXSIG))]
4533   "TARGET_P9_VECTOR && TARGET_64BIT"
4534   "xsxsigdp %0,%x1"
4535   [(set_attr "type" "integer")])
4536
4537 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4538 (define_insn "xsiexpqpf_<mode>"
4539   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4540         (unspec:IEEE128
4541          [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4542           (match_operand:DI 2 "altivec_register_operand" "v")]
4543          UNSPEC_VSX_SIEXPQP))]
4544   "TARGET_P9_VECTOR"
4545   "xsiexpqp %0,%1,%2"
4546   [(set_attr "type" "vecmove")])
4547
4548 ;; VSX Scalar Insert Exponent Quad-Precision
4549 (define_insn "xsiexpqp_<mode>"
4550   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4551         (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4552                          (match_operand:DI 2 "altivec_register_operand" "v")]
4553          UNSPEC_VSX_SIEXPQP))]
4554   "TARGET_P9_VECTOR"
4555   "xsiexpqp %0,%1,%2"
4556   [(set_attr "type" "vecmove")])
4557
4558 ;; VSX Scalar Insert Exponent Double-Precision
4559 (define_insn "xsiexpdp"
4560   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4561         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4562                     (match_operand:DI 2 "register_operand" "r")]
4563          UNSPEC_VSX_SIEXPDP))]
4564   "TARGET_P9_VECTOR && TARGET_64BIT"
4565   "xsiexpdp %x0,%1,%2"
4566   [(set_attr "type" "fpsimple")])
4567
4568 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4569 (define_insn "xsiexpdpf"
4570   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4571         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4572                     (match_operand:DI 2 "register_operand" "r")]
4573          UNSPEC_VSX_SIEXPDP))]
4574   "TARGET_P9_VECTOR && TARGET_64BIT"
4575   "xsiexpdp %x0,%1,%2"
4576   [(set_attr "type" "fpsimple")])
4577
4578 ;; VSX Scalar Compare Exponents Double-Precision
4579 (define_expand "xscmpexpdp_<code>"
4580   [(set (match_dup 3)
4581         (compare:CCFP
4582          (unspec:DF
4583           [(match_operand:DF 1 "vsx_register_operand" "wa")
4584            (match_operand:DF 2 "vsx_register_operand" "wa")]
4585           UNSPEC_VSX_SCMPEXPDP)
4586          (const_int 0)))
4587    (set (match_operand:SI 0 "register_operand" "=r")
4588         (CMP_TEST:SI (match_dup 3)
4589                      (const_int 0)))]
4590   "TARGET_P9_VECTOR"
4591 {
4592   operands[3] = gen_reg_rtx (CCFPmode);
4593 })
4594
4595 (define_insn "*xscmpexpdp"
4596   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4597         (compare:CCFP
4598          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4599                      (match_operand:DF 2 "vsx_register_operand" "wa")]
4600           UNSPEC_VSX_SCMPEXPDP)
4601          (match_operand:SI 3 "zero_constant" "j")))]
4602   "TARGET_P9_VECTOR"
4603   "xscmpexpdp %0,%x1,%x2"
4604   [(set_attr "type" "fpcompare")])
4605
4606 ;; VSX Scalar Test Data Class Quad-Precision
4607 ;;  (Expansion for scalar_test_data_class (__ieee128, int))
4608 ;;   (Has side effect of setting the lt bit if operand 1 is negative,
4609 ;;    setting the eq bit if any of the conditions tested by operand 2
4610 ;;    are satisfied, and clearing the gt and undordered bits to zero.)
4611 (define_expand "xststdcqp_<mode>"
4612   [(set (match_dup 3)
4613         (compare:CCFP
4614          (unspec:IEEE128
4615           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4616            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4617           UNSPEC_VSX_STSTDC)
4618          (const_int 0)))
4619    (set (match_operand:SI 0 "register_operand" "=r")
4620         (eq:SI (match_dup 3)
4621                (const_int 0)))]
4622   "TARGET_P9_VECTOR"
4623 {
4624   operands[3] = gen_reg_rtx (CCFPmode);
4625 })
4626
4627 ;; VSX Scalar Test Data Class Double- and Single-Precision
4628 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4629 ;;   if any of the conditions tested by operand 2 are satisfied.
4630 ;;   The gt and unordered bits are cleared to zero.)
4631 (define_expand "xststdc<Fvsx>"
4632   [(set (match_dup 3)
4633         (compare:CCFP
4634          (unspec:SFDF
4635           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4636            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4637           UNSPEC_VSX_STSTDC)
4638          (match_dup 4)))
4639    (set (match_operand:SI 0 "register_operand" "=r")
4640         (eq:SI (match_dup 3)
4641                (const_int 0)))]
4642   "TARGET_P9_VECTOR"
4643 {
4644   operands[3] = gen_reg_rtx (CCFPmode);
4645   operands[4] = CONST0_RTX (SImode);
4646 })
4647
4648 ;; The VSX Scalar Test Negative Quad-Precision
4649 (define_expand "xststdcnegqp_<mode>"
4650   [(set (match_dup 2)
4651         (compare:CCFP
4652          (unspec:IEEE128
4653           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4654            (const_int 0)]
4655           UNSPEC_VSX_STSTDC)
4656          (const_int 0)))
4657    (set (match_operand:SI 0 "register_operand" "=r")
4658         (lt:SI (match_dup 2)
4659                (const_int 0)))]
4660   "TARGET_P9_VECTOR"
4661 {
4662   operands[2] = gen_reg_rtx (CCFPmode);
4663 })
4664
4665 ;; The VSX Scalar Test Negative Double- and Single-Precision
4666 (define_expand "xststdcneg<Fvsx>"
4667   [(set (match_dup 2)
4668         (compare:CCFP
4669          (unspec:SFDF
4670           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4671            (const_int 0)]
4672           UNSPEC_VSX_STSTDC)
4673          (match_dup 3)))
4674    (set (match_operand:SI 0 "register_operand" "=r")
4675         (lt:SI (match_dup 2)
4676                (const_int 0)))]
4677   "TARGET_P9_VECTOR"
4678 {
4679   operands[2] = gen_reg_rtx (CCFPmode);
4680   operands[3] = CONST0_RTX (SImode);
4681 })
4682
4683 (define_insn "*xststdcqp_<mode>"
4684   [(set (match_operand:CCFP 0 "" "=y")
4685         (compare:CCFP
4686          (unspec:IEEE128
4687           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4688            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4689           UNSPEC_VSX_STSTDC)
4690          (const_int 0)))]
4691   "TARGET_P9_VECTOR"
4692   "xststdcqp %0,%1,%2"
4693   [(set_attr "type" "fpcompare")])
4694
4695 (define_insn "*xststdc<Fvsx>"
4696   [(set (match_operand:CCFP 0 "" "=y")
4697         (compare:CCFP
4698          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4699                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
4700           UNSPEC_VSX_STSTDC)
4701          (match_operand:SI 3 "zero_constant" "j")))]
4702   "TARGET_P9_VECTOR"
4703   "xststdc<Fvsx> %0,%x1,%2"
4704   [(set_attr "type" "fpcompare")])
4705
4706 ;; VSX Vector Extract Exponent Double and Single Precision
4707 (define_insn "xvxexp<VSs>"
4708   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4709         (unspec:VSX_F
4710          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4711          UNSPEC_VSX_VXEXP))]
4712   "TARGET_P9_VECTOR"
4713   "xvxexp<VSs> %x0,%x1"
4714   [(set_attr "type" "vecsimple")])
4715
4716 ;; VSX Vector Extract Significand Double and Single Precision
4717 (define_insn "xvxsig<VSs>"
4718   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4719         (unspec:VSX_F
4720          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4721          UNSPEC_VSX_VXSIG))]
4722   "TARGET_P9_VECTOR"
4723   "xvxsig<VSs> %x0,%x1"
4724   [(set_attr "type" "vecsimple")])
4725
4726 ;; VSX Vector Insert Exponent Double and Single Precision
4727 (define_insn "xviexp<VSs>"
4728   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4729         (unspec:VSX_F
4730          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4731           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4732          UNSPEC_VSX_VIEXP))]
4733   "TARGET_P9_VECTOR"
4734   "xviexp<VSs> %x0,%x1,%x2"
4735   [(set_attr "type" "vecsimple")])
4736
4737 ;; VSX Vector Test Data Class Double and Single Precision
4738 ;; The corresponding elements of the result vector are all ones
4739 ;; if any of the conditions tested by operand 3 are satisfied.
4740 (define_insn "xvtstdc<VSs>"
4741   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4742         (unspec:<VSI>
4743          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4744           (match_operand:SI 2 "u7bit_cint_operand" "n")]
4745          UNSPEC_VSX_VTSTDC))]
4746   "TARGET_P9_VECTOR"
4747   "xvtstdc<VSs> %x0,%x1,%2"
4748   [(set_attr "type" "vecsimple")])
4749
4750 ;; ISA 3.0 String Operations Support
4751
4752 ;; Compare vectors producing a vector result and a predicate, setting CR6
4753 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4754 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4755 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4756 ;; to use Power8 instructions.
4757 (define_insn "*vsx_ne_<mode>_p"
4758   [(set (reg:CC CR6_REGNO)
4759         (unspec:CC
4760          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4761                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4762          UNSPEC_PREDICATE))
4763    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4764         (ne:VSX_EXTRACT_I (match_dup 1)
4765                           (match_dup 2)))]
4766   "TARGET_P9_VECTOR"
4767   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4768   [(set_attr "type" "vecsimple")])
4769
4770 (define_insn "*vector_nez_<mode>_p"
4771   [(set (reg:CC CR6_REGNO)
4772         (unspec:CC [(unspec:VI
4773                      [(match_operand:VI 1 "gpc_reg_operand" "v")
4774                       (match_operand:VI 2 "gpc_reg_operand" "v")]
4775                      UNSPEC_NEZ_P)]
4776          UNSPEC_PREDICATE))
4777    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4778         (unspec:VI [(match_dup 1)
4779                     (match_dup 2)]
4780          UNSPEC_NEZ_P))]
4781   "TARGET_P9_VECTOR"
4782   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4783   [(set_attr "type" "vecsimple")])
4784
4785 ;; Return first position of match between vectors
4786 (define_expand "first_match_index_<mode>"
4787   [(match_operand:SI 0 "register_operand")
4788    (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4789                (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4790   UNSPEC_VSX_FIRST_MATCH_INDEX)]
4791   "TARGET_P9_VECTOR"
4792 {
4793   int sh;
4794
4795   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4796   rtx not_result = gen_reg_rtx (<MODE>mode);
4797
4798   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4799                                              operands[2]));
4800   emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4801
4802   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4803
4804   if (<MODE>mode == V16QImode)
4805     emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4806   else
4807     {
4808       rtx tmp = gen_reg_rtx (SImode);
4809       emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4810       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4811     }
4812   DONE;
4813 })
4814
4815 ;; Return first position of match between vectors or end of string (EOS)
4816 (define_expand "first_match_or_eos_index_<mode>"
4817   [(match_operand:SI 0 "register_operand")
4818    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4819    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4820   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4821   "TARGET_P9_VECTOR"
4822 {
4823   int sh;
4824   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4825   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4826   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4827   rtx and_result = gen_reg_rtx (<MODE>mode);
4828   rtx result = gen_reg_rtx (<MODE>mode);
4829   rtx vzero = gen_reg_rtx (<MODE>mode);
4830
4831   /* Vector with zeros in elements that correspond to zeros in operands.  */
4832   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4833   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4834   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4835   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4836
4837   /* Vector with ones in elments that do not match.  */
4838   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4839                                              operands[2]));
4840
4841   /* Create vector with ones in elements where there was a zero in one of
4842      the source elements or the elements that match.  */
4843   emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4844   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4845
4846   if (<MODE>mode == V16QImode)
4847     emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4848   else
4849     {
4850       rtx tmp = gen_reg_rtx (SImode);
4851       emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4852       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4853     }
4854   DONE;
4855 })
4856
4857 ;; Return first position of mismatch between vectors
4858 (define_expand "first_mismatch_index_<mode>"
4859   [(match_operand:SI 0 "register_operand")
4860    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4861    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4862   UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4863   "TARGET_P9_VECTOR"
4864 {
4865   int sh;
4866   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4867
4868   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4869                                             operands[2]));
4870   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4871
4872   if (<MODE>mode == V16QImode)
4873     emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4874   else
4875     {
4876       rtx tmp = gen_reg_rtx (SImode);
4877       emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4878       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4879     }
4880   DONE;
4881 })
4882
4883 ;; Return first position of mismatch between vectors or end of string (EOS)
4884 (define_expand "first_mismatch_or_eos_index_<mode>"
4885   [(match_operand:SI 0 "register_operand")
4886    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4887    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4888   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4889   "TARGET_P9_VECTOR"
4890 {
4891   int sh;
4892   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4893   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4894   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4895   rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4896   rtx and_result = gen_reg_rtx (<MODE>mode);
4897   rtx result = gen_reg_rtx (<MODE>mode);
4898   rtx vzero = gen_reg_rtx (<MODE>mode);
4899
4900   /* Vector with zeros in elements that correspond to zeros in operands.  */
4901   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4902
4903   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4904   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4905   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4906
4907   /* Vector with ones in elments that match.  */
4908   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4909                                              operands[2]));
4910   emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4911
4912   /* Create vector with ones in elements where there was a zero in one of
4913      the source elements or the elements did not match.  */
4914   emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4915   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4916
4917   if (<MODE>mode == V16QImode)
4918     emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4919   else
4920     {
4921       rtx tmp = gen_reg_rtx (SImode);
4922       emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4923       emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4924     }
4925   DONE;
4926 })
4927
4928 ;; Load VSX Vector with Length
4929 (define_expand "lxvl"
4930   [(set (match_dup 3)
4931         (ashift:DI (match_operand:DI 2 "register_operand")
4932                    (const_int 56)))
4933    (set (match_operand:V16QI 0 "vsx_register_operand")
4934         (unspec:V16QI
4935          [(match_operand:DI 1 "gpc_reg_operand")
4936           (mem:V16QI (match_dup 1))
4937           (match_dup 3)]
4938          UNSPEC_LXVL))]
4939   "TARGET_P9_VECTOR && TARGET_64BIT"
4940 {
4941   operands[3] = gen_reg_rtx (DImode);
4942 })
4943
4944 (define_insn "*lxvl"
4945   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4946         (unspec:V16QI
4947          [(match_operand:DI 1 "gpc_reg_operand" "b")
4948           (mem:V16QI (match_dup 1))
4949           (match_operand:DI 2 "register_operand" "r")]
4950          UNSPEC_LXVL))]
4951   "TARGET_P9_VECTOR && TARGET_64BIT"
4952   "lxvl %x0,%1,%2"
4953   [(set_attr "type" "vecload")])
4954
4955 (define_insn "lxvll"
4956   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4957         (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4958                        (mem:V16QI (match_dup 1))
4959                        (match_operand:DI 2 "register_operand" "r")]
4960                       UNSPEC_LXVLL))]
4961   "TARGET_P9_VECTOR"
4962   "lxvll %x0,%1,%2"
4963   [(set_attr "type" "vecload")])
4964
4965 ;; Expand for builtin xl_len_r
4966 (define_expand "xl_len_r"
4967   [(match_operand:V16QI 0 "vsx_register_operand")
4968    (match_operand:DI 1 "register_operand")
4969    (match_operand:DI 2 "register_operand")]
4970   ""
4971 {
4972   rtx shift_mask = gen_reg_rtx (V16QImode);
4973   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4974   rtx tmp = gen_reg_rtx (DImode);
4975
4976   emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4977   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4978   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4979   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4980              shift_mask));
4981   DONE;
4982 })
4983
4984 (define_insn "stxvll"
4985   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4986         (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4987                        (mem:V16QI (match_dup 1))
4988                        (match_operand:DI 2 "register_operand" "r")]
4989                       UNSPEC_STXVLL))]
4990   "TARGET_P9_VECTOR"
4991   "stxvll %x0,%1,%2"
4992   [(set_attr "type" "vecstore")])
4993
4994 ;; Store VSX Vector with Length
4995 (define_expand "stxvl"
4996   [(set (match_dup 3)
4997         (ashift:DI (match_operand:DI 2 "register_operand")
4998                    (const_int 56)))
4999    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5000         (unspec:V16QI
5001          [(match_operand:V16QI 0 "vsx_register_operand")
5002           (mem:V16QI (match_dup 1))
5003           (match_dup 3)]
5004          UNSPEC_STXVL))]
5005   "TARGET_P9_VECTOR && TARGET_64BIT"
5006 {
5007   operands[3] = gen_reg_rtx (DImode);
5008 })
5009
5010 (define_insn "*stxvl"
5011   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5012         (unspec:V16QI
5013          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5014           (mem:V16QI (match_dup 1))
5015           (match_operand:DI 2 "register_operand" "r")]
5016          UNSPEC_STXVL))]
5017   "TARGET_P9_VECTOR && TARGET_64BIT"
5018   "stxvl %x0,%1,%2"
5019   [(set_attr "type" "vecstore")])
5020
5021 ;; Expand for builtin xst_len_r
5022 (define_expand "xst_len_r"
5023   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5024    (match_operand:DI 1 "register_operand" "b")
5025    (match_operand:DI 2 "register_operand" "r")]
5026   "UNSPEC_XST_LEN_R"
5027 {
5028   rtx shift_mask = gen_reg_rtx (V16QImode);
5029   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5030   rtx tmp = gen_reg_rtx (DImode);
5031
5032   emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5033   emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5034              shift_mask));
5035   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5036   emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5037   DONE;
5038 })
5039
5040 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5041 (define_insn "vcmpneb"
5042   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5043          (not:V16QI
5044            (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5045                      (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5046   "TARGET_P9_VECTOR"
5047   "vcmpneb %0,%1,%2"
5048   [(set_attr "type" "vecsimple")])
5049
5050 ;; Vector Compare Not Equal or Zero Byte
5051 (define_insn "vcmpnezb"
5052   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5053         (unspec:V16QI
5054          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5055           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5056          UNSPEC_VCMPNEZB))]
5057   "TARGET_P9_VECTOR"
5058   "vcmpnezb %0,%1,%2"
5059   [(set_attr "type" "vecsimple")])
5060
5061 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5062 (define_insn "vcmpneh"
5063   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5064         (not:V8HI
5065           (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5066                    (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5067   "TARGET_P9_VECTOR"
5068   "vcmpneh %0,%1,%2"
5069   [(set_attr "type" "vecsimple")])
5070
5071 ;; Vector Compare Not Equal or Zero Half Word
5072 (define_insn "vcmpnezh"
5073   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5074         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5075                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
5076          UNSPEC_VCMPNEZH))]
5077   "TARGET_P9_VECTOR"
5078   "vcmpnezh %0,%1,%2"
5079   [(set_attr "type" "vecsimple")])
5080
5081 ;; Vector Compare Not Equal Word (specified/not+eq:)
5082 (define_insn "vcmpnew"
5083   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5084         (not:V4SI
5085           (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5086                    (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5087   "TARGET_P9_VECTOR"
5088   "vcmpnew %0,%1,%2"
5089   [(set_attr "type" "vecsimple")])
5090
5091 ;; Vector Compare Not Equal or Zero Word
5092 (define_insn "vcmpnezw"
5093   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5094         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5095                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
5096          UNSPEC_VCMPNEZW))]
5097   "TARGET_P9_VECTOR"
5098   "vcmpnezw %0,%1,%2"
5099   [(set_attr "type" "vecsimple")])
5100
5101 ;; Vector Count Leading Zero Least-Significant Bits Byte
5102 (define_insn "vclzlsbb"
5103   [(set (match_operand:SI 0 "register_operand" "=r")
5104         (unspec:SI
5105          [(match_operand:V16QI 1 "altivec_register_operand" "v")]
5106          UNSPEC_VCLZLSBB))]
5107   "TARGET_P9_VECTOR"
5108   "vclzlsbb %0,%1"
5109   [(set_attr "type" "vecsimple")])
5110
5111 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5112 (define_insn "vctzlsbb_<mode>"
5113   [(set (match_operand:SI 0 "register_operand" "=r")
5114         (unspec:SI
5115          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5116          UNSPEC_VCTZLSBB))]
5117   "TARGET_P9_VECTOR"
5118   "vctzlsbb %0,%1"
5119   [(set_attr "type" "vecsimple")])
5120
5121 ;; Vector Extract Unsigned Byte Left-Indexed
5122 (define_insn "vextublx"
5123   [(set (match_operand:SI 0 "register_operand" "=r")
5124         (unspec:SI
5125          [(match_operand:SI 1 "register_operand" "r")
5126           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5127          UNSPEC_VEXTUBLX))]
5128   "TARGET_P9_VECTOR"
5129   "vextublx %0,%1,%2"
5130   [(set_attr "type" "vecsimple")])
5131
5132 ;; Vector Extract Unsigned Byte Right-Indexed
5133 (define_insn "vextubrx"
5134   [(set (match_operand:SI 0 "register_operand" "=r")
5135         (unspec:SI
5136          [(match_operand:SI 1 "register_operand" "r")
5137           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5138          UNSPEC_VEXTUBRX))]
5139   "TARGET_P9_VECTOR"
5140   "vextubrx %0,%1,%2"
5141   [(set_attr "type" "vecsimple")])
5142
5143 ;; Vector Extract Unsigned Half Word Left-Indexed
5144 (define_insn "vextuhlx"
5145   [(set (match_operand:SI 0 "register_operand" "=r")
5146         (unspec:SI
5147          [(match_operand:SI 1 "register_operand" "r")
5148           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5149          UNSPEC_VEXTUHLX))]
5150   "TARGET_P9_VECTOR"
5151   "vextuhlx %0,%1,%2"
5152   [(set_attr "type" "vecsimple")])
5153
5154 ;; Vector Extract Unsigned Half Word Right-Indexed
5155 (define_insn "vextuhrx"
5156   [(set (match_operand:SI 0 "register_operand" "=r")
5157         (unspec:SI
5158          [(match_operand:SI 1 "register_operand" "r")
5159           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5160          UNSPEC_VEXTUHRX))]
5161   "TARGET_P9_VECTOR"
5162   "vextuhrx %0,%1,%2"
5163   [(set_attr "type" "vecsimple")])
5164
5165 ;; Vector Extract Unsigned Word Left-Indexed
5166 (define_insn "vextuwlx"
5167   [(set (match_operand:SI 0 "register_operand" "=r")
5168         (unspec:SI
5169          [(match_operand:SI 1 "register_operand" "r")
5170           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5171          UNSPEC_VEXTUWLX))]
5172   "TARGET_P9_VECTOR"
5173   "vextuwlx %0,%1,%2"
5174   [(set_attr "type" "vecsimple")])
5175
5176 ;; Vector Extract Unsigned Word Right-Indexed
5177 (define_insn "vextuwrx"
5178   [(set (match_operand:SI 0 "register_operand" "=r")
5179         (unspec:SI
5180          [(match_operand:SI 1 "register_operand" "r")
5181           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5182          UNSPEC_VEXTUWRX))]
5183   "TARGET_P9_VECTOR"
5184   "vextuwrx %0,%1,%2"
5185   [(set_attr "type" "vecsimple")])
5186
5187 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
5188 ;; endian version needs to adjust the byte number, and the V4SI element in
5189 ;; vinsert4b.
5190 (define_insn "extract4b"
5191   [(set (match_operand:V2DI 0 "vsx_register_operand")
5192        (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5193                      (match_operand:QI 2 "const_0_to_12_operand" "n")]
5194                     UNSPEC_XXEXTRACTUW))]
5195   "TARGET_P9_VECTOR"
5196 {
5197   if (!VECTOR_ELT_ORDER_BIG)
5198     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5199
5200   return "xxextractuw %x0,%x1,%2";
5201 })
5202
5203 (define_expand "insert4b"
5204   [(set (match_operand:V16QI 0 "vsx_register_operand")
5205         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5206                        (match_operand:V16QI 2 "vsx_register_operand")
5207                        (match_operand:QI 3 "const_0_to_12_operand")]
5208                    UNSPEC_XXINSERTW))]
5209   "TARGET_P9_VECTOR"
5210 {
5211   if (!VECTOR_ELT_ORDER_BIG)
5212     {
5213       rtx op1 = operands[1];
5214       rtx v4si_tmp = gen_reg_rtx (V4SImode);
5215       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5216       operands[1] = v4si_tmp;
5217       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5218     }
5219 })
5220
5221 (define_insn "*insert4b_internal"
5222   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5223         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5224                        (match_operand:V16QI 2 "vsx_register_operand" "0")
5225                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
5226                    UNSPEC_XXINSERTW))]
5227   "TARGET_P9_VECTOR"
5228   "xxinsertw %x0,%x1,%3"
5229   [(set_attr "type" "vecperm")])
5230
5231
5232 ;; Generate vector extract four float 32 values from left four elements
5233 ;; of eight element vector of float 16 values.
5234 (define_expand "vextract_fp_from_shorth"
5235   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5236         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5237    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5238   "TARGET_P9_VECTOR"
5239 {
5240   int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5241   int i;
5242
5243   rtx rvals[16];
5244   rtx mask = gen_reg_rtx (V16QImode);
5245   rtx tmp = gen_reg_rtx (V16QImode);
5246   rtvec v;
5247
5248   for (i = 0; i < 16; i++)
5249     rvals[i] = GEN_INT (vals[i]);
5250
5251   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5252      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5253      src half words 0,1,2,3 for the conversion instruction.  */
5254   v = gen_rtvec_v (16, rvals);
5255   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5256   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5257                                           operands[1], mask));
5258   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5259   DONE;
5260 })
5261
5262 ;; Generate vector extract four float 32 values from right four elements
5263 ;; of eight element vector of float 16 values.
5264 (define_expand "vextract_fp_from_shortl"
5265   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5266         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5267         UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5268   "TARGET_P9_VECTOR"
5269 {
5270   int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5271   int i;
5272   rtx rvals[16];
5273   rtx mask = gen_reg_rtx (V16QImode);
5274   rtx tmp = gen_reg_rtx (V16QImode);
5275   rtvec v;
5276
5277   for (i = 0; i < 16; i++)
5278     rvals[i] = GEN_INT (vals[i]);
5279
5280   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5281      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5282      src half words 4,5,6,7 for the conversion instruction.  */
5283   v = gen_rtvec_v (16, rvals);
5284   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5285   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5286                                           operands[1], mask));
5287   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5288   DONE;
5289 })
5290
5291 ;; Support for ISA 3.0 vector byte reverse
5292
5293 ;; Swap all bytes with in a vector
5294 (define_insn "p9_xxbrq_v1ti"
5295   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5296         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5297   "TARGET_P9_VECTOR"
5298   "xxbrq %x0,%x1"
5299   [(set_attr "type" "vecperm")])
5300
5301 (define_expand "p9_xxbrq_v16qi"
5302   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5303    (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5304   "TARGET_P9_VECTOR"
5305 {
5306   rtx op0 = gen_reg_rtx (V1TImode);
5307   rtx op1 = gen_lowpart (V1TImode, operands[1]);
5308   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5309   emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5310   DONE;
5311 })
5312
5313 ;; Swap all bytes in each 64-bit element
5314 (define_insn "p9_xxbrd_v2di"
5315   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5316         (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5317   "TARGET_P9_VECTOR"
5318   "xxbrd %x0,%x1"
5319   [(set_attr "type" "vecperm")])
5320
5321 (define_expand "p9_xxbrd_v2df"
5322   [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5323    (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5324   "TARGET_P9_VECTOR"
5325 {
5326   rtx op0 = gen_reg_rtx (V2DImode);
5327   rtx op1 = gen_lowpart (V2DImode, operands[1]);
5328   emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5329   emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5330   DONE;
5331 })
5332
5333 ;; Swap all bytes in each 32-bit element
5334 (define_insn "p9_xxbrw_v4si"
5335   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5336         (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5337   "TARGET_P9_VECTOR"
5338   "xxbrw %x0,%x1"
5339   [(set_attr "type" "vecperm")])
5340
5341 (define_expand "p9_xxbrw_v4sf"
5342   [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5343    (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5344   "TARGET_P9_VECTOR"
5345 {
5346   rtx op0 = gen_reg_rtx (V4SImode);
5347   rtx op1 = gen_lowpart (V4SImode, operands[1]);
5348   emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5349   emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5350   DONE;
5351 })
5352
5353 ;; Swap all bytes in each element of vector
5354 (define_expand "revb_<mode>"
5355   [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5356    (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5357   ""
5358 {
5359   if (TARGET_P9_VECTOR)
5360     emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5361   else
5362     {
5363       /* Want to have the elements in reverse order relative
5364          to the endian mode in use, i.e. in LE mode, put elements
5365          in BE order.  */
5366       rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5367       emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5368                                            operands[1], sel));
5369     }
5370
5371   DONE;
5372 })
5373
5374 ;; Reversing bytes in vector char is just a NOP.
5375 (define_expand "revb_v16qi"
5376   [(set (match_operand:V16QI 0 "vsx_register_operand")
5377         (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5378   ""
5379 {
5380   emit_move_insn (operands[0], operands[1]);
5381   DONE;
5382 })
5383
5384 ;; Swap all bytes in each 16-bit element
5385 (define_insn "p9_xxbrh_v8hi"
5386   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5387         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5388   "TARGET_P9_VECTOR"
5389   "xxbrh %x0,%x1"
5390   [(set_attr "type" "vecperm")])
5391 \f
5392
5393 ;; Operand numbers for the following peephole2
5394 (define_constants
5395   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
5396    (SFBOOL_TMP_VSX               1)             ;; vector temporary
5397    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
5398    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
5399    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
5400    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
5401    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
5402    (SFBOOL_SHL_D                 7)             ;; shift left dest
5403    (SFBOOL_SHL_A                 8)             ;; shift left arg
5404    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
5405    (SFBOOL_MFVSR_A_V4SF         10)             ;; SFBOOL_MFVSR_A as V4SFmode
5406    (SFBOOL_BOOL_A_DI            11)             ;; SFBOOL_BOOL_A1/A2 as DImode
5407    (SFBOOL_TMP_VSX_DI           12)             ;; SFBOOL_TMP_VSX as DImode
5408    (SFBOOL_MTVSR_D_V4SF         13)])           ;; SFBOOL_MTVSRD_D as V4SFmode
5409
5410 ;; Attempt to optimize some common GLIBC operations using logical operations to
5411 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
5412 ;; after macro expansion that looks like:
5413 ;;
5414 ;;      typedef union {
5415 ;;        float value;
5416 ;;        uint32_t word;
5417 ;;      } ieee_float_shape_type;
5418 ;;
5419 ;;      float t1;
5420 ;;      int32_t is;
5421 ;;
5422 ;;      do {
5423 ;;        ieee_float_shape_type gf_u;
5424 ;;        gf_u.value = (t1);
5425 ;;        (is) = gf_u.word;
5426 ;;      } while (0);
5427 ;;
5428 ;;      do {
5429 ;;        ieee_float_shape_type sf_u;
5430 ;;        sf_u.word = (is & 0xfffff000);
5431 ;;        (t1) = sf_u.value;
5432 ;;      } while (0);
5433 ;;
5434 ;;
5435 ;; This would result in two direct move operations (convert to memory format,
5436 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5437 ;; scalar format).  With this peephole, we eliminate the direct move to the
5438 ;; GPR, and instead move the integer mask value to the vector register after a
5439 ;; shift and do the VSX logical operation.
5440
5441 ;; The insns for dealing with SFmode in GPR registers looks like:
5442 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5443 ;;
5444 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5445 ;;
5446 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5447 ;;
5448 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5449 ;;
5450 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5451 ;;
5452 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5453
5454 (define_peephole2
5455   [(match_scratch:DI SFBOOL_TMP_GPR "r")
5456    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5457
5458    ;; MFVSRWZ (aka zero_extend)
5459    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5460         (zero_extend:DI
5461          (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5462
5463    ;; AND/IOR/XOR operation on int
5464    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5465         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5466                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5467
5468    ;; SLDI
5469    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5470         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5471                    (const_int 32)))
5472
5473    ;; MTVSRD
5474    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5475         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5476
5477   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5478    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5479       to compare registers, when the mode is different.  */
5480    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5481    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5482    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5483    && (REG_P (operands[SFBOOL_BOOL_A2])
5484        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5485    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5486        || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5487    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5488        || (REG_P (operands[SFBOOL_BOOL_A2])
5489            && REGNO (operands[SFBOOL_MFVSR_D])
5490                 == REGNO (operands[SFBOOL_BOOL_A2])))
5491    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5492    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5493        || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5494    && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5495   [(set (match_dup SFBOOL_TMP_GPR)
5496         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5497                    (const_int 32)))
5498
5499    (set (match_dup SFBOOL_TMP_VSX_DI)
5500         (match_dup SFBOOL_TMP_GPR))
5501
5502    (set (match_dup SFBOOL_MTVSR_D_V4SF)
5503         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5504                           (match_dup SFBOOL_TMP_VSX)))]
5505 {
5506   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5507   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5508   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5509   int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5510   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5511   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5512
5513   if (CONST_INT_P (bool_a2))
5514     {
5515       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5516       emit_move_insn (tmp_gpr, bool_a2);
5517       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5518     }
5519   else
5520     {
5521       int regno_bool_a1 = REGNO (bool_a1);
5522       int regno_bool_a2 = REGNO (bool_a2);
5523       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5524                           ? regno_bool_a2 : regno_bool_a1);
5525       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5526     }
5527
5528   operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5529   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5530   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5531 })