gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for both scalar and vector floating point types supported by VSX
  22 (define_mode_iterator VSX_B [DF V4SF V2DF])
  23
  24 ;; Iterator for the 2 64-bit vector types
  25 (define_mode_iterator VSX_D [V2DF V2DI])
  26
  27 ;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with
  28 ;; lxvd2x to properly handle swapping words on little endian
  29 (define_mode_iterator VSX_LE [V2DF V2DI V1TI])
  30
  31 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  32 ;; types that goes in a single vector register.
  33 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  34                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  35                                   (TI   "TARGET_VSX_TIMODE")])
  36
  37 ;; Iterator for the 2 32-bit vector types
  38 (define_mode_iterator VSX_W [V4SF V4SI])
  39
  40 ;; Iterator for the DF types
  41 (define_mode_iterator VSX_DF [V2DF DF])
  42
  43 ;; Iterator for vector floating point types supported by VSX
  44 (define_mode_iterator VSX_F [V4SF V2DF])
  45
  46 ;; Iterator for logical types supported by VSX
  47 (define_mode_iterator VSX_L [V16QI
  48                              V8HI
  49                              V4SI
  50                              V2DI
  51                              V4SF
  52                              V2DF
  53                              V1TI
  54                              TI
  55                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  56                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  57
  58 ;; Iterator for memory moves.
  59 (define_mode_iterator VSX_M [V16QI
  60                              V8HI
  61                              V4SI
  62                              V2DI
  63                              V4SF
  64                              V2DF
  65                              V1TI
  66                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  67                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  68                              (TI        "TARGET_VSX_TIMODE")])
  69
  70 ;; Map into the appropriate load/store name based on the type
  71 (define_mode_attr VSm  [(V16QI "vw4")
  72                         (V8HI  "vw4")
  73                         (V4SI  "vw4")
  74                         (V4SF  "vw4")
  75                         (V2DF  "vd2")
  76                         (V2DI  "vd2")
  77                         (DF    "d")
  78                         (TF    "vd2")
  79                         (KF    "vd2")
  80                         (V1TI  "vd2")
  81                         (TI    "vd2")])
  82
  83 ;; Map into the appropriate suffix based on the type
  84 (define_mode_attr VSs   [(V16QI "sp")
  85                          (V8HI  "sp")
  86                          (V4SI  "sp")
  87                          (V4SF  "sp")
  88                          (V2DF  "dp")
  89                          (V2DI  "dp")
  90                          (DF    "dp")
  91                          (SF    "sp")
  92                          (TF    "dp")
  93                          (KF    "dp")
  94                          (V1TI  "dp")
  95                          (TI    "dp")])
  96
  97 ;; Map the register class used
  98 (define_mode_attr VSr   [(V16QI "v")
  99                          (V8HI  "v")
 100                          (V4SI  "v")
 101                          (V4SF  "wf")
 102                          (V2DI  "wd")
 103                          (V2DF  "wd")
 104                          (DI    "wi")
 105                          (DF    "ws")
 106                          (SF    "ww")
 107                          (TF    "wp")
 108                          (KF    "wq")
 109                          (V1TI  "v")
 110                          (TI    "wt")])
 111
 112 ;; Map the register class used for float<->int conversions (floating point side)
 113 ;; VSr2 is the preferred register class, VSr3 is any register class that will
 114 ;; hold the data
 115 (define_mode_attr VSr2  [(V2DF  "wd")
 116                          (V4SF  "wf")
 117                          (DF    "ws")
 118                          (SF    "ww")
 119                          (DI    "wi")])
 120
 121 (define_mode_attr VSr3  [(V2DF  "wa")
 122                          (V4SF  "wa")
 123                          (DF    "ws")
 124                          (SF    "ww")
 125                          (DI    "wi")])
 126
 127 ;; Map the register class for sp<->dp float conversions, destination
 128 (define_mode_attr VSr4  [(SF    "ws")
 129                          (DF    "f")
 130                          (V2DF  "wd")
 131                          (V4SF  "v")])
 132
 133 ;; Map the register class for sp<->dp float conversions, source
 134 (define_mode_attr VSr5  [(SF    "ws")
 135                          (DF    "f")
 136                          (V2DF  "v")
 137                          (V4SF  "wd")])
 138
 139 ;; The VSX register class that a type can occupy, even if it is not the
 140 ;; preferred register class (VSr is the preferred register class that will get
 141 ;; allocated first).
 142 (define_mode_attr VSa   [(V16QI "wa")
 143                          (V8HI  "wa")
 144                          (V4SI  "wa")
 145                          (V4SF  "wa")
 146                          (V2DI  "wa")
 147                          (V2DF  "wa")
 148                          (DI    "wi")
 149                          (DF    "ws")
 150                          (SF    "ww")
 151                          (V1TI  "wa")
 152                          (TI    "wt")
 153                          (TF    "wp")
 154                          (KF    "wq")])
 155
 156 ;; Same size integer type for floating point data
 157 (define_mode_attr VSi [(V4SF  "v4si")
 158                        (V2DF  "v2di")
 159                        (DF    "di")])
 160
 161 (define_mode_attr VSI [(V4SF  "V4SI")
 162                        (V2DF  "V2DI")
 163                        (DF    "DI")])
 164
 165 ;; Word size for same size conversion
 166 (define_mode_attr VSc [(V4SF "w")
 167                        (V2DF "d")
 168                        (DF   "d")])
 169
 170 ;; Map into either s or v, depending on whether this is a scalar or vector
 171 ;; operation
 172 (define_mode_attr VSv   [(V16QI "v")
 173                          (V8HI  "v")
 174                          (V4SI  "v")
 175                          (V4SF  "v")
 176                          (V2DI  "v")
 177                          (V2DF  "v")
 178                          (V1TI  "v")
 179                          (DF    "s")
 180                          (KF    "v")])
 181
 182 ;; Appropriate type for add ops (and other simple FP ops)
 183 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 184                                  (V4SF "vecfloat")
 185                                  (DF   "fp")])
 186
 187 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
 188                                    (V4SF "fp_addsub_s")
 189                                    (DF   "fp_addsub_d")])
 190
 191 ;; Appropriate type for multiply ops
 192 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 193                                  (V4SF "vecfloat")
 194                                  (DF   "dmul")])
 195
 196 (define_mode_attr VSfptype_mul  [(V2DF "fp_mul_d")
 197                                  (V4SF "fp_mul_s")
 198                                  (DF   "fp_mul_d")])
 199
 200 ;; Appropriate type for divide ops.
 201 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 202                                  (V4SF "vecfdiv")
 203                                  (DF   "ddiv")])
 204
 205 (define_mode_attr VSfptype_div  [(V2DF "fp_div_d")
 206                                  (V4SF "fp_div_s")
 207                                  (DF   "fp_div_d")])
 208
 209 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 210 ;; the scalar sqrt
 211 (define_mode_attr VStype_sqrt   [(V2DF "dsqrt")
 212                                  (V4SF "ssqrt")
 213                                  (DF   "dsqrt")])
 214
 215 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
 216                                  (V4SF "fp_sqrt_s")
 217                                  (DF   "fp_sqrt_d")])
 218
 219 ;; Iterator and modes for sp<->dp conversions
 220 ;; Because scalar SF values are represented internally as double, use the
 221 ;; V4SF type to represent this than SF.
 222 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
 223
 224 (define_mode_attr VS_spdp_res [(DF      "V4SF")
 225                                (V4SF    "V2DF")
 226                                (V2DF    "V4SF")])
 227
 228 (define_mode_attr VS_spdp_insn [(DF     "xscvdpsp")
 229                                 (V4SF   "xvcvspdp")
 230                                 (V2DF   "xvcvdpsp")])
 231
 232 (define_mode_attr VS_spdp_type [(DF     "fp")
 233                                 (V4SF   "vecdouble")
 234                                 (V2DF   "vecdouble")])
 235
 236 ;; Map the scalar mode for a vector type
 237 (define_mode_attr VS_scalar [(V1TI      "TI")
 238                              (V2DF      "DF")
 239                              (V2DI      "DI")
 240                              (V4SF      "SF")
 241                              (V4SI      "SI")
 242                              (V8HI      "HI")
 243                              (V16QI     "QI")])
 244
 245 ;; Map to a double-sized vector mode
 246 (define_mode_attr VS_double [(V4SI      "V8SI")
 247                              (V4SF      "V8SF")
 248                              (V2DI      "V4DI")
 249                              (V2DF      "V4DF")
 250                              (V1TI      "V2TI")])
 251
 252 ;; Map register class for 64-bit element in 128-bit vector for direct moves
 253 ;; to/from gprs
 254 (define_mode_attr VS_64dm [(V2DF        "wk")
 255                            (V2DI        "wj")])
 256
 257 ;; Map register class for 64-bit element in 128-bit vector for normal register
 258 ;; to register moves
 259 (define_mode_attr VS_64reg [(V2DF       "ws")
 260                             (V2DI       "wi")])
 261
 262 ;; Iterators for loading constants with xxspltib
 263 (define_mode_iterator VSINT_84  [V4SI V2DI DI])
 264 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 265
 266 ;; Constants for creating unspecs
 267 (define_c_enum "unspec"
 268   [UNSPEC_VSX_CONCAT
 269    UNSPEC_VSX_CVDPSXWS
 270    UNSPEC_VSX_CVDPUXWS
 271    UNSPEC_VSX_CVSPDP
 272    UNSPEC_VSX_CVSPDPN
 273    UNSPEC_VSX_CVDPSPN
 274    UNSPEC_VSX_CVSXWDP
 275    UNSPEC_VSX_CVUXWDP
 276    UNSPEC_VSX_CVSXDSP
 277    UNSPEC_VSX_CVUXDSP
 278    UNSPEC_VSX_CVSPSXDS
 279    UNSPEC_VSX_CVSPUXDS
 280    UNSPEC_VSX_TDIV
 281    UNSPEC_VSX_TSQRT
 282    UNSPEC_VSX_SET
 283    UNSPEC_VSX_ROUND_I
 284    UNSPEC_VSX_ROUND_IC
 285    UNSPEC_VSX_SLDWI
 286    UNSPEC_VSX_XXSPLTW
 287    UNSPEC_VSX_XXSPLTD
 288    UNSPEC_VSX_DIVSD
 289    UNSPEC_VSX_DIVUD
 290    UNSPEC_VSX_MULSD
 291    UNSPEC_VSX_XVCVSXDDP
 292    UNSPEC_VSX_XVCVUXDDP
 293    UNSPEC_VSX_XVCVDPSXDS
 294    UNSPEC_VSX_XVCVDPUXDS
 295    UNSPEC_VSX_SIGN_EXTEND
 296    UNSPEC_P9_MEMORY
 297   ])
 298
 299 ;; VSX moves
 300
 301 ;; The patterns for LE permuted loads and stores come before the general
 302 ;; VSX moves so they match first.
 303 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 304   [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
 305         (match_operand:VSX_LE 1 "memory_operand" "Z"))]
 306   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 307   "#"
 308   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 309   [(set (match_dup 2)
 310         (vec_select:<MODE>
 311           (match_dup 1)
 312           (parallel [(const_int 1) (const_int 0)])))
 313    (set (match_dup 0)
 314         (vec_select:<MODE>
 315           (match_dup 2)
 316           (parallel [(const_int 1) (const_int 0)])))]
 317   "
 318 {
 319   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 320                                        : operands[0];
 321 }
 322   "
 323   [(set_attr "type" "vecload")
 324    (set_attr "length" "8")])
 325
 326 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 327   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
 328         (match_operand:VSX_W 1 "memory_operand" "Z"))]
 329   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 330   "#"
 331   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 332   [(set (match_dup 2)
 333         (vec_select:<MODE>
 334           (match_dup 1)
 335           (parallel [(const_int 2) (const_int 3)
 336                      (const_int 0) (const_int 1)])))
 337    (set (match_dup 0)
 338         (vec_select:<MODE>
 339           (match_dup 2)
 340           (parallel [(const_int 2) (const_int 3)
 341                      (const_int 0) (const_int 1)])))]
 342   "
 343 {
 344   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 345                                        : operands[0];
 346 }
 347   "
 348   [(set_attr "type" "vecload")
 349    (set_attr "length" "8")])
 350
 351 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 352   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 353         (match_operand:V8HI 1 "memory_operand" "Z"))]
 354   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 355   "#"
 356   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 357   [(set (match_dup 2)
 358         (vec_select:V8HI
 359           (match_dup 1)
 360           (parallel [(const_int 4) (const_int 5)
 361                      (const_int 6) (const_int 7)
 362                      (const_int 0) (const_int 1)
 363                      (const_int 2) (const_int 3)])))
 364    (set (match_dup 0)
 365         (vec_select:V8HI
 366           (match_dup 2)
 367           (parallel [(const_int 4) (const_int 5)
 368                      (const_int 6) (const_int 7)
 369                      (const_int 0) (const_int 1)
 370                      (const_int 2) (const_int 3)])))]
 371   "
 372 {
 373   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 374                                        : operands[0];
 375 }
 376   "
 377   [(set_attr "type" "vecload")
 378    (set_attr "length" "8")])
 379
 380 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 381   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 382         (match_operand:V16QI 1 "memory_operand" "Z"))]
 383   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 384   "#"
 385   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 386   [(set (match_dup 2)
 387         (vec_select:V16QI
 388           (match_dup 1)
 389           (parallel [(const_int 8) (const_int 9)
 390                      (const_int 10) (const_int 11)
 391                      (const_int 12) (const_int 13)
 392                      (const_int 14) (const_int 15)
 393                      (const_int 0) (const_int 1)
 394                      (const_int 2) (const_int 3)
 395                      (const_int 4) (const_int 5)
 396                      (const_int 6) (const_int 7)])))
 397    (set (match_dup 0)
 398         (vec_select:V16QI
 399           (match_dup 2)
 400           (parallel [(const_int 8) (const_int 9)
 401                      (const_int 10) (const_int 11)
 402                      (const_int 12) (const_int 13)
 403                      (const_int 14) (const_int 15)
 404                      (const_int 0) (const_int 1)
 405                      (const_int 2) (const_int 3)
 406                      (const_int 4) (const_int 5)
 407                      (const_int 6) (const_int 7)])))]
 408   "
 409 {
 410   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 411                                        : operands[0];
 412 }
 413   "
 414   [(set_attr "type" "vecload")
 415    (set_attr "length" "8")])
 416
 417 (define_insn "*vsx_le_perm_store_<mode>"
 418   [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
 419         (match_operand:VSX_LE 1 "vsx_register_operand" "+<VSa>"))]
 420   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 421   "#"
 422   [(set_attr "type" "vecstore")
 423    (set_attr "length" "12")])
 424
 425 (define_split
 426   [(set (match_operand:VSX_LE 0 "memory_operand" "")
 427         (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
 428   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 429   [(set (match_dup 2)
 430         (vec_select:<MODE>
 431           (match_dup 1)
 432           (parallel [(const_int 1) (const_int 0)])))
 433    (set (match_dup 0)
 434         (vec_select:<MODE>
 435           (match_dup 2)
 436           (parallel [(const_int 1) (const_int 0)])))]
 437 {
 438   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 439                                        : operands[1];
 440 })
 441
 442 ;; The post-reload split requires that we re-permute the source
 443 ;; register in case it is still live.
 444 (define_split
 445   [(set (match_operand:VSX_LE 0 "memory_operand" "")
 446         (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
 447   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 448   [(set (match_dup 1)
 449         (vec_select:<MODE>
 450           (match_dup 1)
 451           (parallel [(const_int 1) (const_int 0)])))
 452    (set (match_dup 0)
 453         (vec_select:<MODE>
 454           (match_dup 1)
 455           (parallel [(const_int 1) (const_int 0)])))
 456    (set (match_dup 1)
 457         (vec_select:<MODE>
 458           (match_dup 1)
 459           (parallel [(const_int 1) (const_int 0)])))]
 460   "")
 461
 462 (define_insn "*vsx_le_perm_store_<mode>"
 463   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
 464         (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
 465   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 466   "#"
 467   [(set_attr "type" "vecstore")
 468    (set_attr "length" "12")])
 469
 470 (define_split
 471   [(set (match_operand:VSX_W 0 "memory_operand" "")
 472         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 473   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 474   [(set (match_dup 2)
 475         (vec_select:<MODE>
 476           (match_dup 1)
 477           (parallel [(const_int 2) (const_int 3)
 478                      (const_int 0) (const_int 1)])))
 479    (set (match_dup 0)
 480         (vec_select:<MODE>
 481           (match_dup 2)
 482           (parallel [(const_int 2) (const_int 3)
 483                      (const_int 0) (const_int 1)])))]
 484 {
 485   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 486                                        : operands[1];
 487 })
 488
 489 ;; The post-reload split requires that we re-permute the source
 490 ;; register in case it is still live.
 491 (define_split
 492   [(set (match_operand:VSX_W 0 "memory_operand" "")
 493         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 494   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 495   [(set (match_dup 1)
 496         (vec_select:<MODE>
 497           (match_dup 1)
 498           (parallel [(const_int 2) (const_int 3)
 499                      (const_int 0) (const_int 1)])))
 500    (set (match_dup 0)
 501         (vec_select:<MODE>
 502           (match_dup 1)
 503           (parallel [(const_int 2) (const_int 3)
 504                      (const_int 0) (const_int 1)])))
 505    (set (match_dup 1)
 506         (vec_select:<MODE>
 507           (match_dup 1)
 508           (parallel [(const_int 2) (const_int 3)
 509                      (const_int 0) (const_int 1)])))]
 510   "")
 511
 512 (define_insn "*vsx_le_perm_store_v8hi"
 513   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
 514         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 515   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 516   "#"
 517   [(set_attr "type" "vecstore")
 518    (set_attr "length" "12")])
 519
 520 (define_split
 521   [(set (match_operand:V8HI 0 "memory_operand" "")
 522         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 523   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 524   [(set (match_dup 2)
 525         (vec_select:V8HI
 526           (match_dup 1)
 527           (parallel [(const_int 4) (const_int 5)
 528                      (const_int 6) (const_int 7)
 529                      (const_int 0) (const_int 1)
 530                      (const_int 2) (const_int 3)])))
 531    (set (match_dup 0)
 532         (vec_select:V8HI
 533           (match_dup 2)
 534           (parallel [(const_int 4) (const_int 5)
 535                      (const_int 6) (const_int 7)
 536                      (const_int 0) (const_int 1)
 537                      (const_int 2) (const_int 3)])))]
 538 {
 539   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 540                                        : operands[1];
 541 })
 542
 543 ;; The post-reload split requires that we re-permute the source
 544 ;; register in case it is still live.
 545 (define_split
 546   [(set (match_operand:V8HI 0 "memory_operand" "")
 547         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 548   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 549   [(set (match_dup 1)
 550         (vec_select:V8HI
 551           (match_dup 1)
 552           (parallel [(const_int 4) (const_int 5)
 553                      (const_int 6) (const_int 7)
 554                      (const_int 0) (const_int 1)
 555                      (const_int 2) (const_int 3)])))
 556    (set (match_dup 0)
 557         (vec_select:V8HI
 558           (match_dup 1)
 559           (parallel [(const_int 4) (const_int 5)
 560                      (const_int 6) (const_int 7)
 561                      (const_int 0) (const_int 1)
 562                      (const_int 2) (const_int 3)])))
 563    (set (match_dup 1)
 564         (vec_select:V8HI
 565           (match_dup 1)
 566           (parallel [(const_int 4) (const_int 5)
 567                      (const_int 6) (const_int 7)
 568                      (const_int 0) (const_int 1)
 569                      (const_int 2) (const_int 3)])))]
 570   "")
 571
 572 (define_insn "*vsx_le_perm_store_v16qi"
 573   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
 574         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 575   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 576   "#"
 577   [(set_attr "type" "vecstore")
 578    (set_attr "length" "12")])
 579
 580 (define_split
 581   [(set (match_operand:V16QI 0 "memory_operand" "")
 582         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 583   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 584   [(set (match_dup 2)
 585         (vec_select:V16QI
 586           (match_dup 1)
 587           (parallel [(const_int 8) (const_int 9)
 588                      (const_int 10) (const_int 11)
 589                      (const_int 12) (const_int 13)
 590                      (const_int 14) (const_int 15)
 591                      (const_int 0) (const_int 1)
 592                      (const_int 2) (const_int 3)
 593                      (const_int 4) (const_int 5)
 594                      (const_int 6) (const_int 7)])))
 595    (set (match_dup 0)
 596         (vec_select:V16QI
 597           (match_dup 2)
 598           (parallel [(const_int 8) (const_int 9)
 599                      (const_int 10) (const_int 11)
 600                      (const_int 12) (const_int 13)
 601                      (const_int 14) (const_int 15)
 602                      (const_int 0) (const_int 1)
 603                      (const_int 2) (const_int 3)
 604                      (const_int 4) (const_int 5)
 605                      (const_int 6) (const_int 7)])))]
 606 {
 607   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 608                                        : operands[1];
 609 })
 610
 611 ;; The post-reload split requires that we re-permute the source
 612 ;; register in case it is still live.
 613 (define_split
 614   [(set (match_operand:V16QI 0 "memory_operand" "")
 615         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 616   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 617   [(set (match_dup 1)
 618         (vec_select:V16QI
 619           (match_dup 1)
 620           (parallel [(const_int 8) (const_int 9)
 621                      (const_int 10) (const_int 11)
 622                      (const_int 12) (const_int 13)
 623                      (const_int 14) (const_int 15)
 624                      (const_int 0) (const_int 1)
 625                      (const_int 2) (const_int 3)
 626                      (const_int 4) (const_int 5)
 627                      (const_int 6) (const_int 7)])))
 628    (set (match_dup 0)
 629         (vec_select:V16QI
 630           (match_dup 1)
 631           (parallel [(const_int 8) (const_int 9)
 632                      (const_int 10) (const_int 11)
 633                      (const_int 12) (const_int 13)
 634                      (const_int 14) (const_int 15)
 635                      (const_int 0) (const_int 1)
 636                      (const_int 2) (const_int 3)
 637                      (const_int 4) (const_int 5)
 638                      (const_int 6) (const_int 7)])))
 639    (set (match_dup 1)
 640         (vec_select:V16QI
 641           (match_dup 1)
 642           (parallel [(const_int 8) (const_int 9)
 643                      (const_int 10) (const_int 11)
 644                      (const_int 12) (const_int 13)
 645                      (const_int 14) (const_int 15)
 646                      (const_int 0) (const_int 1)
 647                      (const_int 2) (const_int 3)
 648                      (const_int 4) (const_int 5)
 649                      (const_int 6) (const_int 7)])))]
 650   "")
 651
 652 ;; Little endian word swapping for 128-bit types that are either scalars or the
 653 ;; special V1TI container class, which it is not appropriate to use vec_select
 654 ;; for the type.
 655 (define_insn "*vsx_le_permute_<mode>"
 656   [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
 657         (rotate:VSX_LE_128
 658          (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
 659          (const_int 64)))]
 660   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 661   "@
 662    xxpermdi %x0,%x1,%x1,2
 663    lxvd2x %x0,%y1
 664    stxvd2x %x1,%y0"
 665   [(set_attr "length" "4")
 666    (set_attr "type" "vecperm,vecload,vecstore")])
 667
 668 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
 669   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
 670         (rotate:VSX_LE_128
 671          (rotate:VSX_LE_128
 672           (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
 673           (const_int 64))
 674          (const_int 64)))]
 675   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 676   "@
 677    #
 678    xxlor %x0,%x1"
 679   ""
 680   [(set (match_dup 0) (match_dup 1))]
 681 {
 682   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
 683     {
 684       emit_note (NOTE_INSN_DELETED);
 685       DONE;
 686     }
 687 }
 688   [(set_attr "length" "0,4")
 689    (set_attr "type" "vecsimple")])
 690
 691 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 692   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>")
 693         (match_operand:VSX_LE_128 1 "memory_operand" "Z"))]
 694   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 695   "#"
 696   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 697   [(set (match_dup 2)
 698         (rotate:VSX_LE_128 (match_dup 1)
 699                            (const_int 64)))
 700    (set (match_dup 0)
 701         (rotate:VSX_LE_128 (match_dup 2)
 702                            (const_int 64)))]
 703   "
 704 {
 705   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 706                                        : operands[0];
 707 }
 708   "
 709   [(set_attr "type" "vecload")
 710    (set_attr "length" "8")])
 711
 712 (define_insn "*vsx_le_perm_store_<mode>"
 713   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z")
 714         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))]
 715   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 716   "#"
 717   [(set_attr "type" "vecstore")
 718    (set_attr "length" "12")])
 719
 720 (define_split
 721   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
 722         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
 723   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
 724   [(set (match_dup 2)
 725         (rotate:VSX_LE_128 (match_dup 1)
 726                            (const_int 64)))
 727    (set (match_dup 0)
 728         (rotate:VSX_LE_128 (match_dup 2)
 729                            (const_int 64)))]
 730 {
 731   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 732                                        : operands[0];
 733 })
 734
 735 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
 736 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
 737 ;; floating point are handled by the more generic swap elimination pass.
 738 (define_peephole2
 739   [(set (match_operand:TI 0 "vsx_register_operand" "")
 740         (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
 741                    (const_int 64)))
 742    (set (match_operand:TI 2 "vsx_register_operand" "")
 743         (rotate:TI (match_dup 0)
 744                    (const_int 64)))]
 745   "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE && !TARGET_P9_VECTOR
 746    && (rtx_equal_p (operands[0], operands[2])
 747        || peep2_reg_dead_p (2, operands[0]))"
 748    [(set (match_dup 2) (match_dup 1))])
 749
 750 ;; The post-reload split requires that we re-permute the source
 751 ;; register in case it is still live.
 752 (define_split
 753   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
 754         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
 755   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
 756   [(set (match_dup 1)
 757         (rotate:VSX_LE_128 (match_dup 1)
 758                            (const_int 64)))
 759    (set (match_dup 0)
 760         (rotate:VSX_LE_128 (match_dup 1)
 761                            (const_int 64)))
 762    (set (match_dup 1)
 763         (rotate:VSX_LE_128 (match_dup 1)
 764                            (const_int 64)))]
 765   "")
 766
 767 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
 768 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
 769 (define_insn "xxspltib_v16qi"
 770   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 771         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
 772   "TARGET_P9_VECTOR"
 773 {
 774   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
 775   return "xxspltib %x0,%2";
 776 }
 777   [(set_attr "type" "vecperm")])
 778
 779 (define_insn "xxspltib_<mode>_nosplit"
 780   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
 781         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
 782   "TARGET_P9_VECTOR"
 783 {
 784   rtx op1 = operands[1];
 785   int value = 256;
 786   int num_insns = -1;
 787
 788   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
 789       || num_insns != 1)
 790     gcc_unreachable ();
 791
 792   operands[2] = GEN_INT (value & 0xff);
 793   return "xxspltib %x0,%2";
 794 }
 795   [(set_attr "type" "vecperm")])
 796
 797 (define_insn_and_split "*xxspltib_<mode>_split"
 798   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
 799         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
 800   "TARGET_P9_VECTOR"
 801   "#"
 802   "&& 1"
 803   [(const_int 0)]
 804 {
 805   int value = 256;
 806   int num_insns = -1;
 807   rtx op0 = operands[0];
 808   rtx op1 = operands[1];
 809   rtx tmp = ((can_create_pseudo_p ())
 810              ? gen_reg_rtx (V16QImode)
 811              : gen_lowpart (V16QImode, op0));
 812
 813   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
 814       || num_insns != 2)
 815     gcc_unreachable ();
 816
 817   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
 818
 819   if (<MODE>mode == V2DImode)
 820     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
 821
 822   else if (<MODE>mode == V4SImode)
 823     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
 824
 825   else if (<MODE>mode == V8HImode)
 826     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
 827
 828   else
 829     gcc_unreachable ();
 830
 831   DONE;
 832 }
 833   [(set_attr "type" "vecperm")
 834    (set_attr "length" "8")])
 835
 836
 837 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
 838 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
 839 ;; all 1's, since the machine does not have to wait for the previous
 840 ;; instruction using the register being set (such as a store waiting on a slow
 841 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
 842
 843 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
 844 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
 845 ;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
 846 (define_insn "*vsx_mov<mode>_64bit"
 847   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
 848                "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
 849                 ?&r,       ??r,       ??Y,       ??r,       wo,        v,
 850                 ?<VSa>,    *r,        v,         ??r,       wZ,        v")
 851
 852         (match_operand:VSX_M 1 "input_operand"
 853                "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
 854                 wQ,        Y,         r,         r,         wE,        jwM,
 855                 ?jwM,      jwM,       W,         W,         v,         wZ"))]
 856
 857   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
 858    && (register_operand (operands[0], <MODE>mode)
 859        || register_operand (operands[1], <MODE>mode))"
 860 {
 861   return rs6000_output_move_128bit (operands);
 862 }
 863   [(set_attr "type"
 864                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
 865                 store,     load,      store,     *,         vecsimple, vecsimple,
 866                 vecsimple, *,         *,         *,         vecstore,  vecload")
 867
 868    (set_attr "length"
 869                "4,         4,         4,         8,         4,         8,
 870                 8,         8,         8,         8,         4,         4,
 871                 4,         8,         20,        20,        4,         4")])
 872
 873 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
 874 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
 875 ;;              LVX (VMX)  STVX (VMX)
 876 (define_insn "*vsx_mov<mode>_32bit"
 877   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
 878                "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       ??r,
 879                 wo,        v,         ?<VSa>,    *r,        v,         ??r,
 880                 wZ,        v")
 881
 882         (match_operand:VSX_M 1 "input_operand"
 883                "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
 884                 wE,        jwM,       ?jwM,      jwM,       W,         W,
 885                 v,         wZ"))]
 886
 887   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
 888    && (register_operand (operands[0], <MODE>mode)
 889        || register_operand (operands[1], <MODE>mode))"
 890 {
 891   return rs6000_output_move_128bit (operands);
 892 }
 893   [(set_attr "type"
 894                "vecstore,  vecload,   vecsimple, load,      store,    *,
 895                 vecsimple, vecsimple, vecsimple, *,         *,        *,
 896                 vecstore,  vecload")
 897
 898    (set_attr "length"
 899                "4,         4,         4,         16,        16,        16,
 900                 4,         4,         4,         16,        20,        32,
 901                 4,         4")])
 902
 903 ;; Explicit  load/store expanders for the builtin functions
 904 (define_expand "vsx_load_<mode>"
 905   [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
 906         (match_operand:VSX_M 1 "memory_operand" ""))]
 907   "VECTOR_MEM_VSX_P (<MODE>mode)"
 908   "")
 909
 910 (define_expand "vsx_store_<mode>"
 911   [(set (match_operand:VSX_M 0 "memory_operand" "")
 912         (match_operand:VSX_M 1 "vsx_register_operand" ""))]
 913   "VECTOR_MEM_VSX_P (<MODE>mode)"
 914   "")
 915
 916 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
 917 ;; when you really want their element-reversing behavior.
 918 (define_insn "vsx_ld_elemrev_v2di"
 919   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
 920         (vec_select:V2DI
 921           (match_operand:V2DI 1 "memory_operand" "Z")
 922           (parallel [(const_int 1) (const_int 0)])))]
 923   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
 924   "lxvd2x %x0,%y1"
 925   [(set_attr "type" "vecload")])
 926
 927 (define_insn "vsx_ld_elemrev_v2df"
 928   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
 929         (vec_select:V2DF
 930           (match_operand:V2DF 1 "memory_operand" "Z")
 931           (parallel [(const_int 1) (const_int 0)])))]
 932   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
 933   "lxvd2x %x0,%y1"
 934   [(set_attr "type" "vecload")])
 935
 936 (define_insn "vsx_ld_elemrev_v4si"
 937   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
 938         (vec_select:V4SI
 939           (match_operand:V4SI 1 "memory_operand" "Z")
 940           (parallel [(const_int 3) (const_int 2)
 941                      (const_int 1) (const_int 0)])))]
 942   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
 943   "lxvw4x %x0,%y1"
 944   [(set_attr "type" "vecload")])
 945
 946 (define_insn "vsx_ld_elemrev_v4sf"
 947   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
 948         (vec_select:V4SF
 949           (match_operand:V4SF 1 "memory_operand" "Z")
 950           (parallel [(const_int 3) (const_int 2)
 951                      (const_int 1) (const_int 0)])))]
 952   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
 953   "lxvw4x %x0,%y1"
 954   [(set_attr "type" "vecload")])
 955
 956 (define_insn "vsx_ld_elemrev_v8hi"
 957   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 958         (vec_select:V8HI
 959           (match_operand:V8HI 1 "memory_operand" "Z")
 960           (parallel [(const_int 7) (const_int 6)
 961                      (const_int 5) (const_int 4)
 962                      (const_int 3) (const_int 2)
 963                      (const_int 1) (const_int 0)])))]
 964   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
 965   "lxvh8x %x0,%y1"
 966   [(set_attr "type" "vecload")])
 967
 968 (define_insn "vsx_ld_elemrev_v16qi"
 969   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 970         (vec_select:V16QI
 971           (match_operand:V16QI 1 "memory_operand" "Z")
 972           (parallel [(const_int 15) (const_int 14)
 973                      (const_int 13) (const_int 12)
 974                      (const_int 11) (const_int 10)
 975                      (const_int  9) (const_int  8)
 976                      (const_int  7) (const_int  6)
 977                      (const_int  5) (const_int  4)
 978                      (const_int  3) (const_int  2)
 979                      (const_int  1) (const_int  0)])))]
 980   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
 981   "lxvb16x %x0,%y1"
 982   [(set_attr "type" "vecload")])
 983
 984 (define_insn "vsx_st_elemrev_v2df"
 985   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
 986         (vec_select:V2DF
 987           (match_operand:V2DF 1 "vsx_register_operand" "wa")
 988           (parallel [(const_int 1) (const_int 0)])))]
 989   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
 990   "stxvd2x %x1,%y0"
 991   [(set_attr "type" "vecstore")])
 992
 993 (define_insn "vsx_st_elemrev_v2di"
 994   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
 995         (vec_select:V2DI
 996           (match_operand:V2DI 1 "vsx_register_operand" "wa")
 997           (parallel [(const_int 1) (const_int 0)])))]
 998   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
 999   "stxvd2x %x1,%y0"
1000   [(set_attr "type" "vecstore")])
1001
1002 (define_insn "vsx_st_elemrev_v4sf"
1003   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1004         (vec_select:V4SF
1005           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1006           (parallel [(const_int 3) (const_int 2)
1007                      (const_int 1) (const_int 0)])))]
1008   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1009   "stxvw4x %x1,%y0"
1010   [(set_attr "type" "vecstore")])
1011
1012 (define_insn "vsx_st_elemrev_v4si"
1013   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1014         (vec_select:V4SI
1015           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1016           (parallel [(const_int 3) (const_int 2)
1017                      (const_int 1) (const_int 0)])))]
1018   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1019   "stxvw4x %x1,%y0"
1020   [(set_attr "type" "vecstore")])
1021
1022 (define_insn "vsx_st_elemrev_v8hi"
1023   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1024         (vec_select:V8HI
1025           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1026           (parallel [(const_int 7) (const_int 6)
1027                      (const_int 5) (const_int 4)
1028                      (const_int 3) (const_int 2)
1029                      (const_int 1) (const_int 0)])))]
1030   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1031   "stxvh8x %x1,%y0"
1032   [(set_attr "type" "vecstore")])
1033
1034 (define_insn "vsx_st_elemrev_v16qi"
1035   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1036         (vec_select:V16QI
1037           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1038           (parallel [(const_int 15) (const_int 14)
1039                      (const_int 13) (const_int 12)
1040                      (const_int 11) (const_int 10)
1041                      (const_int  9) (const_int  8)
1042                      (const_int  7) (const_int  6)
1043                      (const_int  5) (const_int  4)
1044                      (const_int  3) (const_int  2)
1045                      (const_int  1) (const_int  0)])))]
1046   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1047   "stxvb16x %x1,%y0"
1048   [(set_attr "type" "vecstore")])
1049
1050 \f
1051 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1052 ;; instructions are now combined with the insn for the traditional floating
1053 ;; point unit.
1054 (define_insn "*vsx_add<mode>3"
1055   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1056         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1057                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1058   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1059   "xvadd<VSs> %x0,%x1,%x2"
1060   [(set_attr "type" "<VStype_simple>")
1061    (set_attr "fp_type" "<VSfptype_simple>")])
1062
1063 (define_insn "*vsx_sub<mode>3"
1064   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1065         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1066                      (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1067   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1068   "xvsub<VSs> %x0,%x1,%x2"
1069   [(set_attr "type" "<VStype_simple>")
1070    (set_attr "fp_type" "<VSfptype_simple>")])
1071
1072 (define_insn "*vsx_mul<mode>3"
1073   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1074         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1075                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1076   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1077   "xvmul<VSs> %x0,%x1,%x2"
1078   [(set_attr "type" "<VStype_simple>")
1079    (set_attr "fp_type" "<VSfptype_mul>")])
1080
1081 ; Emulate vector with scalar for vec_mul in V2DImode
1082 (define_insn_and_split "vsx_mul_v2di"
1083   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1084         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1085                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1086                      UNSPEC_VSX_MULSD))]
1087   "VECTOR_MEM_VSX_P (V2DImode)"
1088   "#"
1089   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1090   [(const_int 0)]
1091   "
1092 {
1093   rtx op0 = operands[0];
1094   rtx op1 = operands[1];
1095   rtx op2 = operands[2];
1096   rtx op3 = gen_reg_rtx (DImode);
1097   rtx op4 = gen_reg_rtx (DImode);
1098   rtx op5 = gen_reg_rtx (DImode);
1099   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1100   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1101   emit_insn (gen_muldi3 (op5, op3, op4));
1102   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1103   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1104   emit_insn (gen_muldi3 (op3, op3, op4));
1105   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1106   DONE;
1107 }"
1108   [(set_attr "type" "mul")])
1109
1110 (define_insn "*vsx_div<mode>3"
1111   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1112         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1113                    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1114   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1115   "xvdiv<VSs> %x0,%x1,%x2"
1116   [(set_attr "type" "<VStype_div>")
1117    (set_attr "fp_type" "<VSfptype_div>")])
1118
1119 ; Emulate vector with scalar for vec_div in V2DImode
1120 (define_insn_and_split "vsx_div_v2di"
1121   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1122         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1123                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1124                      UNSPEC_VSX_DIVSD))]
1125   "VECTOR_MEM_VSX_P (V2DImode)"
1126   "#"
1127   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1128   [(const_int 0)]
1129   "
1130 {
1131   rtx op0 = operands[0];
1132   rtx op1 = operands[1];
1133   rtx op2 = operands[2];
1134   rtx op3 = gen_reg_rtx (DImode);
1135   rtx op4 = gen_reg_rtx (DImode);
1136   rtx op5 = gen_reg_rtx (DImode);
1137   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1138   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1139   emit_insn (gen_divdi3 (op5, op3, op4));
1140   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1141   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1142   emit_insn (gen_divdi3 (op3, op3, op4));
1143   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1144   DONE;
1145 }"
1146   [(set_attr "type" "div")])
1147
1148 (define_insn_and_split "vsx_udiv_v2di"
1149   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1150         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1151                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1152                      UNSPEC_VSX_DIVUD))]
1153   "VECTOR_MEM_VSX_P (V2DImode)"
1154   "#"
1155   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1156   [(const_int 0)]
1157   "
1158 {
1159   rtx op0 = operands[0];
1160   rtx op1 = operands[1];
1161   rtx op2 = operands[2];
1162   rtx op3 = gen_reg_rtx (DImode);
1163   rtx op4 = gen_reg_rtx (DImode);
1164   rtx op5 = gen_reg_rtx (DImode);
1165   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1166   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1167   emit_insn (gen_udivdi3 (op5, op3, op4));
1168   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1169   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1170   emit_insn (gen_udivdi3 (op3, op3, op4));
1171   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1172   DONE;
1173 }"
1174   [(set_attr "type" "div")])
1175
1176 ;; *tdiv* instruction returning the FG flag
1177 (define_expand "vsx_tdiv<mode>3_fg"
1178   [(set (match_dup 3)
1179         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1180                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
1181                      UNSPEC_VSX_TDIV))
1182    (set (match_operand:SI 0 "gpc_reg_operand" "")
1183         (gt:SI (match_dup 3)
1184                (const_int 0)))]
1185   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1186 {
1187   operands[3] = gen_reg_rtx (CCFPmode);
1188 })
1189
1190 ;; *tdiv* instruction returning the FE flag
1191 (define_expand "vsx_tdiv<mode>3_fe"
1192   [(set (match_dup 3)
1193         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1194                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
1195                      UNSPEC_VSX_TDIV))
1196    (set (match_operand:SI 0 "gpc_reg_operand" "")
1197         (eq:SI (match_dup 3)
1198                (const_int 0)))]
1199   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1200 {
1201   operands[3] = gen_reg_rtx (CCFPmode);
1202 })
1203
1204 (define_insn "*vsx_tdiv<mode>3_internal"
1205   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1206         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1207                       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1208                    UNSPEC_VSX_TDIV))]
1209   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1210   "x<VSv>tdiv<VSs> %0,%x1,%x2"
1211   [(set_attr "type" "<VStype_simple>")
1212    (set_attr "fp_type" "<VSfptype_simple>")])
1213
1214 (define_insn "vsx_fre<mode>2"
1215   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1216         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1217                       UNSPEC_FRES))]
1218   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1219   "xvre<VSs> %x0,%x1"
1220   [(set_attr "type" "<VStype_simple>")
1221    (set_attr "fp_type" "<VSfptype_simple>")])
1222
1223 (define_insn "*vsx_neg<mode>2"
1224   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1225         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1226   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1227   "xvneg<VSs> %x0,%x1"
1228   [(set_attr "type" "<VStype_simple>")
1229    (set_attr "fp_type" "<VSfptype_simple>")])
1230
1231 (define_insn "*vsx_abs<mode>2"
1232   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1233         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1234   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1235   "xvabs<VSs> %x0,%x1"
1236   [(set_attr "type" "<VStype_simple>")
1237    (set_attr "fp_type" "<VSfptype_simple>")])
1238
1239 (define_insn "vsx_nabs<mode>2"
1240   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1241         (neg:VSX_F
1242          (abs:VSX_F
1243           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1244   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1245   "xvnabs<VSs> %x0,%x1"
1246   [(set_attr "type" "<VStype_simple>")
1247    (set_attr "fp_type" "<VSfptype_simple>")])
1248
1249 (define_insn "vsx_smax<mode>3"
1250   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1251         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1252                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1253   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1254   "xvmax<VSs> %x0,%x1,%x2"
1255   [(set_attr "type" "<VStype_simple>")
1256    (set_attr "fp_type" "<VSfptype_simple>")])
1257
1258 (define_insn "*vsx_smin<mode>3"
1259   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1260         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1261                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1262   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1263   "xvmin<VSs> %x0,%x1,%x2"
1264   [(set_attr "type" "<VStype_simple>")
1265    (set_attr "fp_type" "<VSfptype_simple>")])
1266
1267 (define_insn "*vsx_sqrt<mode>2"
1268   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1269         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1270   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1271   "xvsqrt<VSs> %x0,%x1"
1272   [(set_attr "type" "<VStype_sqrt>")
1273    (set_attr "fp_type" "<VSfptype_sqrt>")])
1274
1275 (define_insn "*vsx_rsqrte<mode>2"
1276   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1277         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1278                       UNSPEC_RSQRT))]
1279   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1280   "xvrsqrte<VSs> %x0,%x1"
1281   [(set_attr "type" "<VStype_simple>")
1282    (set_attr "fp_type" "<VSfptype_simple>")])
1283
1284 ;; *tsqrt* returning the fg flag
1285 (define_expand "vsx_tsqrt<mode>2_fg"
1286   [(set (match_dup 3)
1287         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1288                      UNSPEC_VSX_TSQRT))
1289    (set (match_operand:SI 0 "gpc_reg_operand" "")
1290         (gt:SI (match_dup 3)
1291                (const_int 0)))]
1292   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1293 {
1294   operands[3] = gen_reg_rtx (CCFPmode);
1295 })
1296
1297 ;; *tsqrt* returning the fe flag
1298 (define_expand "vsx_tsqrt<mode>2_fe"
1299   [(set (match_dup 3)
1300         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1301                      UNSPEC_VSX_TSQRT))
1302    (set (match_operand:SI 0 "gpc_reg_operand" "")
1303         (eq:SI (match_dup 3)
1304                (const_int 0)))]
1305   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1306 {
1307   operands[3] = gen_reg_rtx (CCFPmode);
1308 })
1309
1310 (define_insn "*vsx_tsqrt<mode>2_internal"
1311   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1312         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1313                      UNSPEC_VSX_TSQRT))]
1314   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1315   "x<VSv>tsqrt<VSs> %0,%x1"
1316   [(set_attr "type" "<VStype_simple>")
1317    (set_attr "fp_type" "<VSfptype_simple>")])
1318
1319 ;; Fused vector multiply/add instructions. Support the classical Altivec
1320 ;; versions of fma, which allows the target to be a separate register from the
1321 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1322 ;; multiply.
1323
1324 (define_insn "*vsx_fmav4sf4"
1325   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1326         (fma:V4SF
1327           (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1328           (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1329           (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1330   "VECTOR_UNIT_VSX_P (V4SFmode)"
1331   "@
1332    xvmaddasp %x0,%x1,%x2
1333    xvmaddmsp %x0,%x1,%x3
1334    xvmaddasp %x0,%x1,%x2
1335    xvmaddmsp %x0,%x1,%x3
1336    vmaddfp %0,%1,%2,%3"
1337   [(set_attr "type" "vecfloat")])
1338
1339 (define_insn "*vsx_fmav2df4"
1340   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1341         (fma:V2DF
1342           (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1343           (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1344           (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1345   "VECTOR_UNIT_VSX_P (V2DFmode)"
1346   "@
1347    xvmaddadp %x0,%x1,%x2
1348    xvmaddmdp %x0,%x1,%x3
1349    xvmaddadp %x0,%x1,%x2
1350    xvmaddmdp %x0,%x1,%x3"
1351   [(set_attr "type" "vecdouble")])
1352
1353 (define_insn "*vsx_fms<mode>4"
1354   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1355         (fma:VSX_F
1356           (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1357           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1358           (neg:VSX_F
1359             (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1360   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1361   "@
1362    xvmsuba<VSs> %x0,%x1,%x2
1363    xvmsubm<VSs> %x0,%x1,%x3
1364    xvmsuba<VSs> %x0,%x1,%x2
1365    xvmsubm<VSs> %x0,%x1,%x3"
1366   [(set_attr "type" "<VStype_mul>")])
1367
1368 (define_insn "*vsx_nfma<mode>4"
1369   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1370         (neg:VSX_F
1371          (fma:VSX_F
1372           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1373           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1374           (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1375   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1376   "@
1377    xvnmadda<VSs> %x0,%x1,%x2
1378    xvnmaddm<VSs> %x0,%x1,%x3
1379    xvnmadda<VSs> %x0,%x1,%x2
1380    xvnmaddm<VSs> %x0,%x1,%x3"
1381   [(set_attr "type" "<VStype_mul>")
1382    (set_attr "fp_type" "<VSfptype_mul>")])
1383
1384 (define_insn "*vsx_nfmsv4sf4"
1385   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1386         (neg:V4SF
1387          (fma:V4SF
1388            (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1389            (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1390            (neg:V4SF
1391              (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1392   "VECTOR_UNIT_VSX_P (V4SFmode)"
1393   "@
1394    xvnmsubasp %x0,%x1,%x2
1395    xvnmsubmsp %x0,%x1,%x3
1396    xvnmsubasp %x0,%x1,%x2
1397    xvnmsubmsp %x0,%x1,%x3
1398    vnmsubfp %0,%1,%2,%3"
1399   [(set_attr "type" "vecfloat")])
1400
1401 (define_insn "*vsx_nfmsv2df4"
1402   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1403         (neg:V2DF
1404          (fma:V2DF
1405            (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1406            (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1407            (neg:V2DF
1408              (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1409   "VECTOR_UNIT_VSX_P (V2DFmode)"
1410   "@
1411    xvnmsubadp %x0,%x1,%x2
1412    xvnmsubmdp %x0,%x1,%x3
1413    xvnmsubadp %x0,%x1,%x2
1414    xvnmsubmdp %x0,%x1,%x3"
1415   [(set_attr "type" "vecdouble")])
1416
1417 ;; Vector conditional expressions (no scalar version for these instructions)
1418 (define_insn "vsx_eq<mode>"
1419   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1420         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1421                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1422   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1423   "xvcmpeq<VSs> %x0,%x1,%x2"
1424   [(set_attr "type" "<VStype_simple>")
1425    (set_attr "fp_type" "<VSfptype_simple>")])
1426
1427 (define_insn "vsx_gt<mode>"
1428   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1429         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1430                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1431   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1432   "xvcmpgt<VSs> %x0,%x1,%x2"
1433   [(set_attr "type" "<VStype_simple>")
1434    (set_attr "fp_type" "<VSfptype_simple>")])
1435
1436 (define_insn "*vsx_ge<mode>"
1437   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1438         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1439                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1440   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1441   "xvcmpge<VSs> %x0,%x1,%x2"
1442   [(set_attr "type" "<VStype_simple>")
1443    (set_attr "fp_type" "<VSfptype_simple>")])
1444
1445 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1446 ;; indicate a combined status
1447 (define_insn "*vsx_eq_<mode>_p"
1448   [(set (reg:CC 74)
1449         (unspec:CC
1450          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1451                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1452          UNSPEC_PREDICATE))
1453    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1454         (eq:VSX_F (match_dup 1)
1455                   (match_dup 2)))]
1456   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1457   "xvcmpeq<VSs>. %x0,%x1,%x2"
1458   [(set_attr "type" "<VStype_simple>")])
1459
1460 (define_insn "*vsx_gt_<mode>_p"
1461   [(set (reg:CC 74)
1462         (unspec:CC
1463          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1464                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1465          UNSPEC_PREDICATE))
1466    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1467         (gt:VSX_F (match_dup 1)
1468                   (match_dup 2)))]
1469   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1470   "xvcmpgt<VSs>. %x0,%x1,%x2"
1471   [(set_attr "type" "<VStype_simple>")])
1472
1473 (define_insn "*vsx_ge_<mode>_p"
1474   [(set (reg:CC 74)
1475         (unspec:CC
1476          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1477                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1478          UNSPEC_PREDICATE))
1479    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1480         (ge:VSX_F (match_dup 1)
1481                   (match_dup 2)))]
1482   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1483   "xvcmpge<VSs>. %x0,%x1,%x2"
1484   [(set_attr "type" "<VStype_simple>")])
1485
1486 ;; Vector select
1487 (define_insn "*vsx_xxsel<mode>"
1488   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1489         (if_then_else:VSX_L
1490          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1491                 (match_operand:VSX_L 4 "zero_constant" ""))
1492          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1493          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1494   "VECTOR_MEM_VSX_P (<MODE>mode)"
1495   "xxsel %x0,%x3,%x2,%x1"
1496   [(set_attr "type" "vecperm")])
1497
1498 (define_insn "*vsx_xxsel<mode>_uns"
1499   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1500         (if_then_else:VSX_L
1501          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1502                    (match_operand:VSX_L 4 "zero_constant" ""))
1503          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1504          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1505   "VECTOR_MEM_VSX_P (<MODE>mode)"
1506   "xxsel %x0,%x3,%x2,%x1"
1507   [(set_attr "type" "vecperm")])
1508
1509 ;; Copy sign
1510 (define_insn "vsx_copysign<mode>3"
1511   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1512         (unspec:VSX_F
1513          [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1514           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1515          UNSPEC_COPYSIGN))]
1516   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1517   "xvcpsgn<VSs> %x0,%x2,%x1"
1518   [(set_attr "type" "<VStype_simple>")
1519    (set_attr "fp_type" "<VSfptype_simple>")])
1520
1521 ;; For the conversions, limit the register class for the integer value to be
1522 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1523 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1524 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1525 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
1526 ;; in allowing virtual registers.
1527 (define_insn "vsx_float<VSi><mode>2"
1528   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1529         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1530   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1531   "xvcvsx<VSc><VSs> %x0,%x1"
1532   [(set_attr "type" "<VStype_simple>")
1533    (set_attr "fp_type" "<VSfptype_simple>")])
1534
1535 (define_insn "vsx_floatuns<VSi><mode>2"
1536   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1537         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1538   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1539   "xvcvux<VSc><VSs> %x0,%x1"
1540   [(set_attr "type" "<VStype_simple>")
1541    (set_attr "fp_type" "<VSfptype_simple>")])
1542
1543 (define_insn "vsx_fix_trunc<mode><VSi>2"
1544   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1545         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1546   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1547   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1548   [(set_attr "type" "<VStype_simple>")
1549    (set_attr "fp_type" "<VSfptype_simple>")])
1550
1551 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1552   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1553         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1554   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1555   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1556   [(set_attr "type" "<VStype_simple>")
1557    (set_attr "fp_type" "<VSfptype_simple>")])
1558
1559 ;; Math rounding functions
1560 (define_insn "vsx_x<VSv>r<VSs>i"
1561   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1562         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1563                       UNSPEC_VSX_ROUND_I))]
1564   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1565   "x<VSv>r<VSs>i %x0,%x1"
1566   [(set_attr "type" "<VStype_simple>")
1567    (set_attr "fp_type" "<VSfptype_simple>")])
1568
1569 (define_insn "vsx_x<VSv>r<VSs>ic"
1570   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1571         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1572                       UNSPEC_VSX_ROUND_IC))]
1573   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1574   "x<VSv>r<VSs>ic %x0,%x1"
1575   [(set_attr "type" "<VStype_simple>")
1576    (set_attr "fp_type" "<VSfptype_simple>")])
1577
1578 (define_insn "vsx_btrunc<mode>2"
1579   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1580         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1581   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1582   "xvr<VSs>iz %x0,%x1"
1583   [(set_attr "type" "<VStype_simple>")
1584    (set_attr "fp_type" "<VSfptype_simple>")])
1585
1586 (define_insn "*vsx_b2trunc<mode>2"
1587   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1588         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1589                       UNSPEC_FRIZ))]
1590   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1591   "x<VSv>r<VSs>iz %x0,%x1"
1592   [(set_attr "type" "<VStype_simple>")
1593    (set_attr "fp_type" "<VSfptype_simple>")])
1594
1595 (define_insn "vsx_floor<mode>2"
1596   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1597         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1598                       UNSPEC_FRIM))]
1599   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1600   "xvr<VSs>im %x0,%x1"
1601   [(set_attr "type" "<VStype_simple>")
1602    (set_attr "fp_type" "<VSfptype_simple>")])
1603
1604 (define_insn "vsx_ceil<mode>2"
1605   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1606         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1607                       UNSPEC_FRIP))]
1608   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1609   "xvr<VSs>ip %x0,%x1"
1610   [(set_attr "type" "<VStype_simple>")
1611    (set_attr "fp_type" "<VSfptype_simple>")])
1612
1613 \f
1614 ;; VSX convert to/from double vector
1615
1616 ;; Convert between single and double precision
1617 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1618 ;; scalar single precision instructions internally use the double format.
1619 ;; Prefer the altivec registers, since we likely will need to do a vperm
1620 (define_insn "vsx_<VS_spdp_insn>"
1621   [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1622         (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1623                               UNSPEC_VSX_CVSPDP))]
1624   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1625   "<VS_spdp_insn> %x0,%x1"
1626   [(set_attr "type" "<VS_spdp_type>")])
1627
1628 ;; xscvspdp, represent the scalar SF type as V4SF
1629 (define_insn "vsx_xscvspdp"
1630   [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1631         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1632                    UNSPEC_VSX_CVSPDP))]
1633   "VECTOR_UNIT_VSX_P (V4SFmode)"
1634   "xscvspdp %x0,%x1"
1635   [(set_attr "type" "fp")])
1636
1637 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1638 ;; format of scalars is actually DF.
1639 (define_insn "vsx_xscvdpsp_scalar"
1640   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1641         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1642                      UNSPEC_VSX_CVSPDP))]
1643   "VECTOR_UNIT_VSX_P (V4SFmode)"
1644   "xscvdpsp %x0,%x1"
1645   [(set_attr "type" "fp")])
1646
1647 ;; Same as vsx_xscvspdp, but use SF as the type
1648 (define_insn "vsx_xscvspdp_scalar2"
1649   [(set (match_operand:SF 0 "vsx_register_operand" "=f")
1650         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1651                    UNSPEC_VSX_CVSPDP))]
1652   "VECTOR_UNIT_VSX_P (V4SFmode)"
1653   "xscvspdp %x0,%x1"
1654   [(set_attr "type" "fp")])
1655
1656 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1657 (define_insn "vsx_xscvdpspn"
1658   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww,?ww")
1659         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1660                      UNSPEC_VSX_CVDPSPN))]
1661   "TARGET_XSCVDPSPN"
1662   "xscvdpspn %x0,%x1"
1663   [(set_attr "type" "fp")])
1664
1665 (define_insn "vsx_xscvspdpn"
1666   [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?ws")
1667         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
1668                    UNSPEC_VSX_CVSPDPN))]
1669   "TARGET_XSCVSPDPN"
1670   "xscvspdpn %x0,%x1"
1671   [(set_attr "type" "fp")])
1672
1673 (define_insn "vsx_xscvdpspn_scalar"
1674   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa")
1675         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww,ww")]
1676                      UNSPEC_VSX_CVDPSPN))]
1677   "TARGET_XSCVDPSPN"
1678   "xscvdpspn %x0,%x1"
1679   [(set_attr "type" "fp")])
1680
1681 ;; Used by direct move to move a SFmode value from GPR to VSX register
1682 (define_insn "vsx_xscvspdpn_directmove"
1683   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1684         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
1685                    UNSPEC_VSX_CVSPDPN))]
1686   "TARGET_XSCVSPDPN"
1687   "xscvspdpn %x0,%x1"
1688   [(set_attr "type" "fp")])
1689
1690 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1691
1692 (define_expand "vsx_xvcvsxddp_scale"
1693   [(match_operand:V2DF 0 "vsx_register_operand" "")
1694    (match_operand:V2DI 1 "vsx_register_operand" "")
1695    (match_operand:QI 2 "immediate_operand" "")]
1696   "VECTOR_UNIT_VSX_P (V2DFmode)"
1697 {
1698   rtx op0 = operands[0];
1699   rtx op1 = operands[1];
1700   int scale = INTVAL(operands[2]);
1701   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1702   if (scale != 0)
1703     rs6000_scale_v2df (op0, op0, -scale);
1704   DONE;
1705 })
1706
1707 (define_insn "vsx_xvcvsxddp"
1708   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1709         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1710                      UNSPEC_VSX_XVCVSXDDP))]
1711   "VECTOR_UNIT_VSX_P (V2DFmode)"
1712   "xvcvsxddp %x0,%x1"
1713   [(set_attr "type" "vecdouble")])
1714
1715 (define_expand "vsx_xvcvuxddp_scale"
1716   [(match_operand:V2DF 0 "vsx_register_operand" "")
1717    (match_operand:V2DI 1 "vsx_register_operand" "")
1718    (match_operand:QI 2 "immediate_operand" "")]
1719   "VECTOR_UNIT_VSX_P (V2DFmode)"
1720 {
1721   rtx op0 = operands[0];
1722   rtx op1 = operands[1];
1723   int scale = INTVAL(operands[2]);
1724   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1725   if (scale != 0)
1726     rs6000_scale_v2df (op0, op0, -scale);
1727   DONE;
1728 })
1729
1730 (define_insn "vsx_xvcvuxddp"
1731   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1732         (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1733                      UNSPEC_VSX_XVCVUXDDP))]
1734   "VECTOR_UNIT_VSX_P (V2DFmode)"
1735   "xvcvuxddp %x0,%x1"
1736   [(set_attr "type" "vecdouble")])
1737
1738 (define_expand "vsx_xvcvdpsxds_scale"
1739   [(match_operand:V2DI 0 "vsx_register_operand" "")
1740    (match_operand:V2DF 1 "vsx_register_operand" "")
1741    (match_operand:QI 2 "immediate_operand" "")]
1742   "VECTOR_UNIT_VSX_P (V2DFmode)"
1743 {
1744   rtx op0 = operands[0];
1745   rtx op1 = operands[1];
1746   rtx tmp;
1747   int scale = INTVAL (operands[2]);
1748   if (scale == 0)
1749     tmp = op1;
1750   else
1751     {
1752       tmp  = gen_reg_rtx (V2DFmode);
1753       rs6000_scale_v2df (tmp, op1, scale);
1754     }
1755   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1756   DONE;
1757 })
1758
1759 (define_insn "vsx_xvcvdpsxds"
1760   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1761         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1762                      UNSPEC_VSX_XVCVDPSXDS))]
1763   "VECTOR_UNIT_VSX_P (V2DFmode)"
1764   "xvcvdpsxds %x0,%x1"
1765   [(set_attr "type" "vecdouble")])
1766
1767 (define_expand "vsx_xvcvdpuxds_scale"
1768   [(match_operand:V2DI 0 "vsx_register_operand" "")
1769    (match_operand:V2DF 1 "vsx_register_operand" "")
1770    (match_operand:QI 2 "immediate_operand" "")]
1771   "VECTOR_UNIT_VSX_P (V2DFmode)"
1772 {
1773   rtx op0 = operands[0];
1774   rtx op1 = operands[1];
1775   rtx tmp;
1776   int scale = INTVAL (operands[2]);
1777   if (scale == 0)
1778     tmp = op1;
1779   else
1780     {
1781       tmp = gen_reg_rtx (V2DFmode);
1782       rs6000_scale_v2df (tmp, op1, scale);
1783     }
1784   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1785   DONE;
1786 })
1787
1788 (define_insn "vsx_xvcvdpuxds"
1789   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1790         (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1791                      UNSPEC_VSX_XVCVDPUXDS))]
1792   "VECTOR_UNIT_VSX_P (V2DFmode)"
1793   "xvcvdpuxds %x0,%x1"
1794   [(set_attr "type" "vecdouble")])
1795
1796 ;; Convert from 64-bit to 32-bit types
1797 ;; Note, favor the Altivec registers since the usual use of these instructions
1798 ;; is in vector converts and we need to use the Altivec vperm instruction.
1799
1800 (define_insn "vsx_xvcvdpsxws"
1801   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1802         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1803                      UNSPEC_VSX_CVDPSXWS))]
1804   "VECTOR_UNIT_VSX_P (V2DFmode)"
1805   "xvcvdpsxws %x0,%x1"
1806   [(set_attr "type" "vecdouble")])
1807
1808 (define_insn "vsx_xvcvdpuxws"
1809   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1810         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1811                      UNSPEC_VSX_CVDPUXWS))]
1812   "VECTOR_UNIT_VSX_P (V2DFmode)"
1813   "xvcvdpuxws %x0,%x1"
1814   [(set_attr "type" "vecdouble")])
1815
1816 (define_insn "vsx_xvcvsxdsp"
1817   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1818         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1819                      UNSPEC_VSX_CVSXDSP))]
1820   "VECTOR_UNIT_VSX_P (V2DFmode)"
1821   "xvcvsxdsp %x0,%x1"
1822   [(set_attr "type" "vecfloat")])
1823
1824 (define_insn "vsx_xvcvuxdsp"
1825   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1826         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1827                      UNSPEC_VSX_CVUXDSP))]
1828   "VECTOR_UNIT_VSX_P (V2DFmode)"
1829   "xvcvuxwdp %x0,%x1"
1830   [(set_attr "type" "vecdouble")])
1831
1832 ;; Convert from 32-bit to 64-bit types
1833 (define_insn "vsx_xvcvsxwdp"
1834   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1835         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1836                      UNSPEC_VSX_CVSXWDP))]
1837   "VECTOR_UNIT_VSX_P (V2DFmode)"
1838   "xvcvsxwdp %x0,%x1"
1839   [(set_attr "type" "vecdouble")])
1840
1841 (define_insn "vsx_xvcvuxwdp"
1842   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1843         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1844                      UNSPEC_VSX_CVUXWDP))]
1845   "VECTOR_UNIT_VSX_P (V2DFmode)"
1846   "xvcvuxwdp %x0,%x1"
1847   [(set_attr "type" "vecdouble")])
1848
1849 (define_insn "vsx_xvcvspsxds"
1850   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1851         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1852                      UNSPEC_VSX_CVSPSXDS))]
1853   "VECTOR_UNIT_VSX_P (V2DFmode)"
1854   "xvcvspsxds %x0,%x1"
1855   [(set_attr "type" "vecdouble")])
1856
1857 (define_insn "vsx_xvcvspuxds"
1858   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1859         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1860                      UNSPEC_VSX_CVSPUXDS))]
1861   "VECTOR_UNIT_VSX_P (V2DFmode)"
1862   "xvcvspuxds %x0,%x1"
1863   [(set_attr "type" "vecdouble")])
1864
1865 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1866 ;; since the xvrdpiz instruction does not truncate the value if the floating
1867 ;; point value is < LONG_MIN or > LONG_MAX.
1868 (define_insn "*vsx_float_fix_v2df2"
1869   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1870         (float:V2DF
1871          (fix:V2DI
1872           (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
1873   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1874    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
1875    && !flag_trapping_math && TARGET_FRIZ"
1876   "xvrdpiz %x0,%x1"
1877   [(set_attr "type" "vecdouble")
1878    (set_attr "fp_type" "fp_addsub_d")])
1879
1880 \f
1881 ;; Permute operations
1882
1883 ;; Build a V2DF/V2DI vector from two scalars
1884 (define_insn "vsx_concat_<mode>"
1885   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1886         (vec_concat:VSX_D
1887          (match_operand:<VS_scalar> 1 "vsx_register_operand" "<VS_64reg>,<VSa>")
1888          (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")))]
1889   "VECTOR_MEM_VSX_P (<MODE>mode)"
1890 {
1891   if (BYTES_BIG_ENDIAN)
1892     return "xxpermdi %x0,%x1,%x2,0";
1893   else
1894     return "xxpermdi %x0,%x2,%x1,0";
1895 }
1896   [(set_attr "type" "vecperm")])
1897
1898 ;; Special purpose concat using xxpermdi to glue two single precision values
1899 ;; together, relying on the fact that internally scalar floats are represented
1900 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
1901 (define_insn "vsx_concat_v2sf"
1902   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1903         (unspec:V2DF
1904          [(match_operand:SF 1 "vsx_register_operand" "f,f")
1905           (match_operand:SF 2 "vsx_register_operand" "f,f")]
1906          UNSPEC_VSX_CONCAT))]
1907   "VECTOR_MEM_VSX_P (V2DFmode)"
1908 {
1909   if (BYTES_BIG_ENDIAN)
1910     return "xxpermdi %x0,%x1,%x2,0";
1911   else
1912     return "xxpermdi %x0,%x2,%x1,0";
1913 }
1914   [(set_attr "type" "vecperm")])
1915
1916 ;; xxpermdi for little endian loads and stores.  We need several of
1917 ;; these since the form of the PARALLEL differs by mode.
1918 (define_insn "*vsx_xxpermdi2_le_<mode>"
1919   [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1920         (vec_select:VSX_LE
1921           (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1922           (parallel [(const_int 1) (const_int 0)])))]
1923   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1924   "xxpermdi %x0,%x1,%x1,2"
1925   [(set_attr "type" "vecperm")])
1926
1927 (define_insn "*vsx_xxpermdi4_le_<mode>"
1928   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1929         (vec_select:VSX_W
1930           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1931           (parallel [(const_int 2) (const_int 3)
1932                      (const_int 0) (const_int 1)])))]
1933   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1934   "xxpermdi %x0,%x1,%x1,2"
1935   [(set_attr "type" "vecperm")])
1936
1937 (define_insn "*vsx_xxpermdi8_le_V8HI"
1938   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1939         (vec_select:V8HI
1940           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1941           (parallel [(const_int 4) (const_int 5)
1942                      (const_int 6) (const_int 7)
1943                      (const_int 0) (const_int 1)
1944                      (const_int 2) (const_int 3)])))]
1945   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1946   "xxpermdi %x0,%x1,%x1,2"
1947   [(set_attr "type" "vecperm")])
1948
1949 (define_insn "*vsx_xxpermdi16_le_V16QI"
1950   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1951         (vec_select:V16QI
1952           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1953           (parallel [(const_int 8) (const_int 9)
1954                      (const_int 10) (const_int 11)
1955                      (const_int 12) (const_int 13)
1956                      (const_int 14) (const_int 15)
1957                      (const_int 0) (const_int 1)
1958                      (const_int 2) (const_int 3)
1959                      (const_int 4) (const_int 5)
1960                      (const_int 6) (const_int 7)])))]
1961   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1962   "xxpermdi %x0,%x1,%x1,2"
1963   [(set_attr "type" "vecperm")])
1964
1965 ;; lxvd2x for little endian loads.  We need several of
1966 ;; these since the form of the PARALLEL differs by mode.
1967 (define_insn "*vsx_lxvd2x2_le_<mode>"
1968   [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1969         (vec_select:VSX_LE
1970           (match_operand:VSX_LE 1 "memory_operand" "Z")
1971           (parallel [(const_int 1) (const_int 0)])))]
1972   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1973   "lxvd2x %x0,%y1"
1974   [(set_attr "type" "vecload")])
1975
1976 (define_insn "*vsx_lxvd2x4_le_<mode>"
1977   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1978         (vec_select:VSX_W
1979           (match_operand:VSX_W 1 "memory_operand" "Z")
1980           (parallel [(const_int 2) (const_int 3)
1981                      (const_int 0) (const_int 1)])))]
1982   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1983   "lxvd2x %x0,%y1"
1984   [(set_attr "type" "vecload")])
1985
1986 (define_insn "*vsx_lxvd2x8_le_V8HI"
1987   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1988         (vec_select:V8HI
1989           (match_operand:V8HI 1 "memory_operand" "Z")
1990           (parallel [(const_int 4) (const_int 5)
1991                      (const_int 6) (const_int 7)
1992                      (const_int 0) (const_int 1)
1993                      (const_int 2) (const_int 3)])))]
1994   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
1995   "lxvd2x %x0,%y1"
1996   [(set_attr "type" "vecload")])
1997
1998 (define_insn "*vsx_lxvd2x16_le_V16QI"
1999   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2000         (vec_select:V16QI
2001           (match_operand:V16QI 1 "memory_operand" "Z")
2002           (parallel [(const_int 8) (const_int 9)
2003                      (const_int 10) (const_int 11)
2004                      (const_int 12) (const_int 13)
2005                      (const_int 14) (const_int 15)
2006                      (const_int 0) (const_int 1)
2007                      (const_int 2) (const_int 3)
2008                      (const_int 4) (const_int 5)
2009                      (const_int 6) (const_int 7)])))]
2010   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2011   "lxvd2x %x0,%y1"
2012   [(set_attr "type" "vecload")])
2013
2014 ;; stxvd2x for little endian stores.  We need several of
2015 ;; these since the form of the PARALLEL differs by mode.
2016 (define_insn "*vsx_stxvd2x2_le_<mode>"
2017   [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
2018         (vec_select:VSX_LE
2019           (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
2020           (parallel [(const_int 1) (const_int 0)])))]
2021   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2022   "stxvd2x %x1,%y0"
2023   [(set_attr "type" "vecstore")])
2024
2025 (define_insn "*vsx_stxvd2x4_le_<mode>"
2026   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
2027         (vec_select:VSX_W
2028           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2029           (parallel [(const_int 2) (const_int 3)
2030                      (const_int 0) (const_int 1)])))]
2031   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2032   "stxvd2x %x1,%y0"
2033   [(set_attr "type" "vecstore")])
2034
2035 (define_insn "*vsx_stxvd2x8_le_V8HI"
2036   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
2037         (vec_select:V8HI
2038           (match_operand:V8HI 1 "vsx_register_operand" "wa")
2039           (parallel [(const_int 4) (const_int 5)
2040                      (const_int 6) (const_int 7)
2041                      (const_int 0) (const_int 1)
2042                      (const_int 2) (const_int 3)])))]
2043   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2044   "stxvd2x %x1,%y0"
2045   [(set_attr "type" "vecstore")])
2046
2047 (define_insn "*vsx_stxvd2x16_le_V16QI"
2048   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
2049         (vec_select:V16QI
2050           (match_operand:V16QI 1 "vsx_register_operand" "wa")
2051           (parallel [(const_int 8) (const_int 9)
2052                      (const_int 10) (const_int 11)
2053                      (const_int 12) (const_int 13)
2054                      (const_int 14) (const_int 15)
2055                      (const_int 0) (const_int 1)
2056                      (const_int 2) (const_int 3)
2057                      (const_int 4) (const_int 5)
2058                      (const_int 6) (const_int 7)])))]
2059   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2060   "stxvd2x %x1,%y0"
2061   [(set_attr "type" "vecstore")])
2062
2063 ;; Convert a TImode value into V1TImode
2064 (define_expand "vsx_set_v1ti"
2065   [(match_operand:V1TI 0 "nonimmediate_operand" "")
2066    (match_operand:V1TI 1 "nonimmediate_operand" "")
2067    (match_operand:TI 2 "input_operand" "")
2068    (match_operand:QI 3 "u5bit_cint_operand" "")]
2069   "VECTOR_MEM_VSX_P (V1TImode)"
2070 {
2071   if (operands[3] != const0_rtx)
2072     gcc_unreachable ();
2073
2074   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
2075   DONE;
2076 })
2077
2078 ;; Set the element of a V2DI/VD2F mode
2079 (define_insn "vsx_set_<mode>"
2080   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
2081         (unspec:VSX_D
2082          [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
2083           (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
2084           (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
2085          UNSPEC_VSX_SET))]
2086   "VECTOR_MEM_VSX_P (<MODE>mode)"
2087 {
2088   int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
2089   if (INTVAL (operands[3]) == idx_first)
2090     return \"xxpermdi %x0,%x2,%x1,1\";
2091   else if (INTVAL (operands[3]) == 1 - idx_first)
2092     return \"xxpermdi %x0,%x1,%x2,0\";
2093   else
2094     gcc_unreachable ();
2095 }
2096   [(set_attr "type" "vecperm")])
2097
2098 ;; Extract a DF/DI element from V2DF/V2DI
2099 ;; Optimize cases were we can do a simple or direct move.
2100 ;; Or see if we can avoid doing the move at all
2101
2102 ;; There are some unresolved problems with reload that show up if an Altivec
2103 ;; register was picked.  Limit the scalar value to FPRs for now.
2104
2105 (define_insn "vsx_extract_<mode>"
2106   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand"
2107             "=d,     wm,      wo,    d")
2108
2109         (vec_select:<VS_scalar>
2110          (match_operand:VSX_D 1 "gpc_reg_operand"
2111             "<VSa>, <VSa>,  <VSa>,  <VSa>")
2112
2113          (parallel
2114           [(match_operand:QI 2 "const_0_to_1_operand"
2115             "wD,    wD,     wL,     n")])))]
2116   "VECTOR_MEM_VSX_P (<MODE>mode)"
2117 {
2118   int element = INTVAL (operands[2]);
2119   int op0_regno = REGNO (operands[0]);
2120   int op1_regno = REGNO (operands[1]);
2121   int fldDM;
2122
2123   gcc_assert (IN_RANGE (element, 0, 1));
2124   gcc_assert (VSX_REGNO_P (op1_regno));
2125
2126   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
2127     {
2128       if (op0_regno == op1_regno)
2129         return ASM_COMMENT_START " vec_extract to same register";
2130
2131       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
2132                && TARGET_POWERPC64)
2133         return "mfvsrd %0,%x1";
2134
2135       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
2136         return "fmr %0,%1";
2137
2138       else if (VSX_REGNO_P (op0_regno))
2139         return "xxlor %x0,%x1,%x1";
2140
2141       else
2142         gcc_unreachable ();
2143     }
2144
2145   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
2146            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
2147     return "mfvsrdl %0,%x1";
2148
2149   else if (VSX_REGNO_P (op0_regno))
2150     {
2151       fldDM = element << 1;
2152       if (!BYTES_BIG_ENDIAN)
2153         fldDM = 3 - fldDM;
2154       operands[3] = GEN_INT (fldDM);
2155       return "xxpermdi %x0,%x1,%x1,%3";
2156     }
2157
2158   else
2159     gcc_unreachable ();
2160 }
2161   [(set_attr "type" "vecsimple,mftgpr,mftgpr,vecperm")])
2162
2163 ;; Optimize extracting a single scalar element from memory if the scalar is in
2164 ;; the correct location to use a single load.
2165 (define_insn "*vsx_extract_<mode>_load"
2166   [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr")
2167         (vec_select:<VS_scalar>
2168          (match_operand:VSX_D 1 "memory_operand" "m,Z,m")
2169          (parallel [(const_int 0)])))]
2170   "VECTOR_MEM_VSX_P (<MODE>mode)"
2171   "@
2172    lfd%U1%X1 %0,%1
2173    lxsd%U1x %x0,%y1
2174    ld%U1%X1 %0,%1"
2175   [(set_attr "type" "fpload,fpload,load")
2176    (set_attr "length" "4")])
2177
2178 ;; Optimize storing a single scalar element that is the right location to
2179 ;; memory
2180 (define_insn "*vsx_extract_<mode>_store"
2181   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,?Z")
2182         (vec_select:<VS_scalar>
2183          (match_operand:VSX_D 1 "register_operand" "d,wd,<VSa>")
2184          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
2185   "VECTOR_MEM_VSX_P (<MODE>mode)"
2186   "@
2187    stfd%U0%X0 %1,%0
2188    stxsd%U0x %x1,%y0
2189    stxsd%U0x %x1,%y0"
2190   [(set_attr "type" "fpstore")
2191    (set_attr "length" "4")])
2192
2193 ;; Extract a SF element from V4SF
2194 (define_insn_and_split "vsx_extract_v4sf"
2195   [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
2196         (vec_select:SF
2197          (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2198          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
2199    (clobber (match_scratch:V4SF 3 "=X,0"))]
2200   "VECTOR_UNIT_VSX_P (V4SFmode)"
2201   "@
2202    xscvspdp %x0,%x1
2203    #"
2204   ""
2205   [(const_int 0)]
2206   "
2207 {
2208   rtx op0 = operands[0];
2209   rtx op1 = operands[1];
2210   rtx op2 = operands[2];
2211   rtx op3 = operands[3];
2212   rtx tmp;
2213   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
2214
2215   if (ele == 0)
2216     tmp = op1;
2217   else
2218     {
2219       if (GET_CODE (op3) == SCRATCH)
2220         op3 = gen_reg_rtx (V4SFmode);
2221       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
2222       tmp = op3;
2223     }
2224   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
2225   DONE;
2226 }"
2227   [(set_attr "length" "4,8")
2228    (set_attr "type" "fp")])
2229
2230 ;; Expand the builtin form of xxpermdi to canonical rtl.
2231 (define_expand "vsx_xxpermdi_<mode>"
2232   [(match_operand:VSX_L 0 "vsx_register_operand" "")
2233    (match_operand:VSX_L 1 "vsx_register_operand" "")
2234    (match_operand:VSX_L 2 "vsx_register_operand" "")
2235    (match_operand:QI 3 "u5bit_cint_operand" "")]
2236   "VECTOR_MEM_VSX_P (<MODE>mode)"
2237 {
2238   rtx target = operands[0];
2239   rtx op0 = operands[1];
2240   rtx op1 = operands[2];
2241   int mask = INTVAL (operands[3]);
2242   rtx perm0 = GEN_INT ((mask >> 1) & 1);
2243   rtx perm1 = GEN_INT ((mask & 1) + 2);
2244   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2245
2246   if (<MODE>mode == V2DFmode)
2247     gen = gen_vsx_xxpermdi2_v2df_1;
2248   else
2249     {
2250       gen = gen_vsx_xxpermdi2_v2di_1;
2251       if (<MODE>mode != V2DImode)
2252         {
2253           target = gen_lowpart (V2DImode, target);
2254           op0 = gen_lowpart (V2DImode, op0);
2255           op1 = gen_lowpart (V2DImode, op1);
2256         }
2257     }
2258   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
2259      transformation we don't want; it is necessary for
2260      rs6000_expand_vec_perm_const_1 but not for this use.  So we
2261      prepare for that by reversing the transformation here.  */
2262   if (BYTES_BIG_ENDIAN)
2263     emit_insn (gen (target, op0, op1, perm0, perm1));
2264   else
2265     {
2266       rtx p0 = GEN_INT (3 - INTVAL (perm1));
2267       rtx p1 = GEN_INT (3 - INTVAL (perm0));
2268       emit_insn (gen (target, op1, op0, p0, p1));
2269     }
2270   DONE;
2271 })
2272
2273 (define_insn "vsx_xxpermdi2_<mode>_1"
2274   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
2275         (vec_select:VSX_D
2276           (vec_concat:<VS_double>
2277             (match_operand:VSX_D 1 "vsx_register_operand" "wd")
2278             (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
2279           (parallel [(match_operand 3 "const_0_to_1_operand" "")
2280                      (match_operand 4 "const_2_to_3_operand" "")])))]
2281   "VECTOR_MEM_VSX_P (<MODE>mode)"
2282 {
2283   int op3, op4, mask;
2284
2285   /* For little endian, swap operands and invert/swap selectors
2286      to get the correct xxpermdi.  The operand swap sets up the
2287      inputs as a little endian array.  The selectors are swapped
2288      because they are defined to use big endian ordering.  The
2289      selectors are inverted to get the correct doublewords for
2290      little endian ordering.  */
2291   if (BYTES_BIG_ENDIAN)
2292     {
2293       op3 = INTVAL (operands[3]);
2294       op4 = INTVAL (operands[4]);
2295     }
2296   else
2297     {
2298       op3 = 3 - INTVAL (operands[4]);
2299       op4 = 3 - INTVAL (operands[3]);
2300     }
2301
2302   mask = (op3 << 1) | (op4 - 2);
2303   operands[3] = GEN_INT (mask);
2304
2305   if (BYTES_BIG_ENDIAN)
2306     return "xxpermdi %x0,%x1,%x2,%3";
2307   else
2308     return "xxpermdi %x0,%x2,%x1,%3";
2309 }
2310   [(set_attr "type" "vecperm")])
2311
2312 (define_expand "vec_perm_const<mode>"
2313   [(match_operand:VSX_D 0 "vsx_register_operand" "")
2314    (match_operand:VSX_D 1 "vsx_register_operand" "")
2315    (match_operand:VSX_D 2 "vsx_register_operand" "")
2316    (match_operand:V2DI  3 "" "")]
2317   "VECTOR_MEM_VSX_P (<MODE>mode)"
2318 {
2319   if (rs6000_expand_vec_perm_const (operands))
2320     DONE;
2321   else
2322     FAIL;
2323 })
2324
2325 ;; Expanders for builtins
2326 (define_expand "vsx_mergel_<mode>"
2327   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2328    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2329    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2330   "VECTOR_MEM_VSX_P (<MODE>mode)"
2331 {
2332   rtvec v;
2333   rtx x;
2334
2335   /* Special handling for LE with -maltivec=be.  */
2336   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2337     {
2338       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2339       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2340     }
2341   else
2342     {
2343       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2344       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2345     }
2346
2347   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2348   emit_insn (gen_rtx_SET (operands[0], x));
2349   DONE;
2350 })
2351
2352 (define_expand "vsx_mergeh_<mode>"
2353   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2354    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2355    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2356   "VECTOR_MEM_VSX_P (<MODE>mode)"
2357 {
2358   rtvec v;
2359   rtx x;
2360
2361   /* Special handling for LE with -maltivec=be.  */
2362   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2363     {
2364       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2365       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2366     }
2367   else
2368     {
2369       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2370       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2371     }
2372
2373   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2374   emit_insn (gen_rtx_SET (operands[0], x));
2375   DONE;
2376 })
2377
2378 ;; V2DF/V2DI splat
2379 (define_insn "vsx_splat_<mode>"
2380   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>,<VSa>,we")
2381         (vec_duplicate:VSX_D
2382          (match_operand:<VS_scalar> 1 "splat_input_operand" "<VS_64reg>,Z,b")))]
2383   "VECTOR_MEM_VSX_P (<MODE>mode)"
2384   "@
2385    xxpermdi %x0,%x1,%x1,0
2386    lxvdsx %x0,%y1
2387    mtvsrdd %x0,%1,%1"
2388   [(set_attr "type" "vecperm,vecload,mftgpr")])
2389
2390 ;; V4SI splat (ISA 3.0)
2391 ;; When SI's are allowed in VSX registers, add XXSPLTW support
2392 (define_expand "vsx_splat_<mode>"
2393   [(set (match_operand:VSX_W 0 "vsx_register_operand" "")
2394         (vec_duplicate:VSX_W
2395          (match_operand:<VS_scalar> 1 "splat_input_operand" "")))]
2396   "TARGET_P9_VECTOR"
2397 {
2398   if (MEM_P (operands[1]))
2399     operands[1] = rs6000_address_for_fpconvert (operands[1]);
2400   else if (!REG_P (operands[1]))
2401     operands[1] = force_reg (<VS_scalar>mode, operands[1]);
2402 })
2403
2404 (define_insn "*vsx_splat_v4si_internal"
2405   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
2406         (vec_duplicate:V4SI
2407          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
2408   "TARGET_P9_VECTOR"
2409   "@
2410    mtvsrws %x0,%1
2411    lxvwsx %x0,%y1"
2412   [(set_attr "type" "mftgpr,vecload")])
2413
2414 ;; V4SF splat (ISA 3.0)
2415 (define_insn_and_split "*vsx_splat_v4sf_internal"
2416   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
2417         (vec_duplicate:V4SF
2418          (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
2419   "TARGET_P9_VECTOR"
2420   "@
2421    lxvwsx %x0,%y1
2422    #
2423    mtvsrws %x0,%1"
2424   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
2425   [(set (match_dup 0)
2426         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
2427    (set (match_dup 0)
2428         (vec_duplicate:V4SF
2429          (vec_select:SF (match_dup 0)
2430                         (parallel [(const_int 0)]))))]
2431   ""
2432   [(set_attr "type" "vecload,vecperm,mftgpr")
2433    (set_attr "length" "4,8,4")])
2434
2435 ;; V4SF/V4SI splat from a vector element
2436 (define_insn "vsx_xxspltw_<mode>"
2437   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2438         (vec_duplicate:VSX_W
2439          (vec_select:<VS_scalar>
2440           (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2441           (parallel
2442            [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
2443   "VECTOR_MEM_VSX_P (<MODE>mode)"
2444 {
2445   if (!BYTES_BIG_ENDIAN)
2446     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
2447
2448   return "xxspltw %x0,%x1,%2";
2449 }
2450   [(set_attr "type" "vecperm")])
2451
2452 (define_insn "vsx_xxspltw_<mode>_direct"
2453   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2454         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2455                        (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
2456                       UNSPEC_VSX_XXSPLTW))]
2457   "VECTOR_MEM_VSX_P (<MODE>mode)"
2458   "xxspltw %x0,%x1,%2"
2459   [(set_attr "type" "vecperm")])
2460
2461 ;; V2DF/V2DI splat for use by vec_splat builtin
2462 (define_insn "vsx_xxspltd_<mode>"
2463   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2464         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
2465                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
2466                       UNSPEC_VSX_XXSPLTD))]
2467   "VECTOR_MEM_VSX_P (<MODE>mode)"
2468 {
2469   if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
2470       || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
2471     return "xxpermdi %x0,%x1,%x1,0";
2472   else
2473     return "xxpermdi %x0,%x1,%x1,3";
2474 }
2475   [(set_attr "type" "vecperm")])
2476
2477 ;; V4SF/V4SI interleave
2478 (define_insn "vsx_xxmrghw_<mode>"
2479   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2480         (vec_select:VSX_W
2481           (vec_concat:<VS_double>
2482             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2483             (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
2484           (parallel [(const_int 0) (const_int 4)
2485                      (const_int 1) (const_int 5)])))]
2486   "VECTOR_MEM_VSX_P (<MODE>mode)"
2487 {
2488   if (BYTES_BIG_ENDIAN)
2489     return "xxmrghw %x0,%x1,%x2";
2490   else
2491     return "xxmrglw %x0,%x2,%x1";
2492 }
2493   [(set_attr "type" "vecperm")])
2494
2495 (define_insn "vsx_xxmrglw_<mode>"
2496   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2497         (vec_select:VSX_W
2498           (vec_concat:<VS_double>
2499             (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2500             (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
2501           (parallel [(const_int 2) (const_int 6)
2502                      (const_int 3) (const_int 7)])))]
2503   "VECTOR_MEM_VSX_P (<MODE>mode)"
2504 {
2505   if (BYTES_BIG_ENDIAN)
2506     return "xxmrglw %x0,%x1,%x2";
2507   else
2508     return "xxmrghw %x0,%x2,%x1";
2509 }
2510   [(set_attr "type" "vecperm")])
2511
2512 ;; Shift left double by word immediate
2513 (define_insn "vsx_xxsldwi_<mode>"
2514   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
2515         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
2516                        (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
2517                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
2518                       UNSPEC_VSX_SLDWI))]
2519   "VECTOR_MEM_VSX_P (<MODE>mode)"
2520   "xxsldwi %x0,%x1,%x2,%3"
2521   [(set_attr "type" "vecperm")])
2522
2523 \f
2524 ;; Vector reduction insns and splitters
2525
2526 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
2527   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
2528         (VEC_reduc:V2DF
2529          (vec_concat:V2DF
2530           (vec_select:DF
2531            (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2532            (parallel [(const_int 1)]))
2533           (vec_select:DF
2534            (match_dup 1)
2535            (parallel [(const_int 0)])))
2536          (match_dup 1)))
2537    (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
2538   "VECTOR_UNIT_VSX_P (V2DFmode)"
2539   "#"
2540   ""
2541   [(const_int 0)]
2542   "
2543 {
2544   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
2545              ? gen_reg_rtx (V2DFmode)
2546              : operands[2];
2547   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
2548   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
2549   DONE;
2550 }"
2551   [(set_attr "length" "8")
2552    (set_attr "type" "veccomplex")])
2553
2554 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
2555   [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
2556         (VEC_reduc:V4SF
2557          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2558          (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
2559    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2560    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
2561   "VECTOR_UNIT_VSX_P (V4SFmode)"
2562   "#"
2563   ""
2564   [(const_int 0)]
2565   "
2566 {
2567   rtx op0 = operands[0];
2568   rtx op1 = operands[1];
2569   rtx tmp2, tmp3, tmp4;
2570
2571   if (can_create_pseudo_p ())
2572     {
2573       tmp2 = gen_reg_rtx (V4SFmode);
2574       tmp3 = gen_reg_rtx (V4SFmode);
2575       tmp4 = gen_reg_rtx (V4SFmode);
2576     }
2577   else
2578     {
2579       tmp2 = operands[2];
2580       tmp3 = operands[3];
2581       tmp4 = tmp2;
2582     }
2583
2584   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2585   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2586   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2587   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
2588   DONE;
2589 }"
2590   [(set_attr "length" "16")
2591    (set_attr "type" "veccomplex")])
2592
2593 ;; Combiner patterns with the vector reduction patterns that knows we can get
2594 ;; to the top element of the V2DF array without doing an extract.
2595
2596 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
2597   [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
2598         (vec_select:DF
2599          (VEC_reduc:V2DF
2600           (vec_concat:V2DF
2601            (vec_select:DF
2602             (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2603             (parallel [(const_int 1)]))
2604            (vec_select:DF
2605             (match_dup 1)
2606             (parallel [(const_int 0)])))
2607           (match_dup 1))
2608          (parallel [(const_int 1)])))
2609    (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
2610   "VECTOR_UNIT_VSX_P (V2DFmode)"
2611   "#"
2612   ""
2613   [(const_int 0)]
2614   "
2615 {
2616   rtx hi = gen_highpart (DFmode, operands[1]);
2617   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
2618             ? gen_reg_rtx (DFmode)
2619             : operands[2];
2620
2621   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
2622   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
2623   DONE;
2624 }"
2625   [(set_attr "length" "8")
2626    (set_attr "type" "veccomplex")])
2627
2628 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
2629   [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
2630         (vec_select:SF
2631          (VEC_reduc:V4SF
2632           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2633           (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
2634          (parallel [(const_int 3)])))
2635    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2636    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
2637    (clobber (match_scratch:V4SF 4 "=0,0"))]
2638   "VECTOR_UNIT_VSX_P (V4SFmode)"
2639   "#"
2640   ""
2641   [(const_int 0)]
2642   "
2643 {
2644   rtx op0 = operands[0];
2645   rtx op1 = operands[1];
2646   rtx tmp2, tmp3, tmp4, tmp5;
2647
2648   if (can_create_pseudo_p ())
2649     {
2650       tmp2 = gen_reg_rtx (V4SFmode);
2651       tmp3 = gen_reg_rtx (V4SFmode);
2652       tmp4 = gen_reg_rtx (V4SFmode);
2653       tmp5 = gen_reg_rtx (V4SFmode);
2654     }
2655   else
2656     {
2657       tmp2 = operands[2];
2658       tmp3 = operands[3];
2659       tmp4 = tmp2;
2660       tmp5 = operands[4];
2661     }
2662
2663   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2664   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2665   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2666   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
2667   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
2668   DONE;
2669 }"
2670   [(set_attr "length" "20")
2671    (set_attr "type" "veccomplex")])
2672
2673 \f
2674 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
2675 (define_peephole
2676   [(set (match_operand:P 0 "base_reg_operand" "")
2677         (match_operand:P 1 "short_cint_operand" ""))
2678    (set (match_operand:VSX_M 2 "vsx_register_operand" "")
2679         (mem:VSX_M (plus:P (match_dup 0)
2680                            (match_operand:P 3 "int_reg_operand" ""))))]
2681   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
2682   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
2683   [(set_attr "length" "8")
2684    (set_attr "type" "vecload")])
2685
2686 (define_peephole
2687   [(set (match_operand:P 0 "base_reg_operand" "")
2688         (match_operand:P 1 "short_cint_operand" ""))
2689    (set (match_operand:VSX_M 2 "vsx_register_operand" "")
2690         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
2691                            (match_dup 0))))]
2692   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
2693   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
2694   [(set_attr "length" "8")
2695    (set_attr "type" "vecload")])
2696
2697 \f
2698 ;; ISA 3.0 vector extend sign support
2699
2700 (define_insn "vsx_sign_extend_qi_<mode>"
2701   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
2702         (unspec:VSINT_84
2703          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2704          UNSPEC_VSX_SIGN_EXTEND))]
2705   "TARGET_P9_VECTOR"
2706   "vextsb2<wd> %0,%1"
2707   [(set_attr "type" "vecsimple")])
2708
2709 (define_insn "vsx_sign_extend_hi_<mode>"
2710   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
2711         (unspec:VSINT_84
2712          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
2713          UNSPEC_VSX_SIGN_EXTEND))]
2714   "TARGET_P9_VECTOR"
2715   "vextsh2<wd> %0,%1"
2716   [(set_attr "type" "vecsimple")])
2717
2718 (define_insn "*vsx_sign_extend_si_v2di"
2719   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
2720         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
2721                      UNSPEC_VSX_SIGN_EXTEND))]
2722   "TARGET_P9_VECTOR"
2723   "vextsw2d %0,%1"
2724   [(set_attr "type" "vecsimple")])
2725
2726 \f
2727 ;; ISA 3.0 memory operations
2728 (define_insn "p9_lxsi<wd>zx"
2729   [(set (match_operand:DI 0 "vsx_register_operand" "=wi")
2730         (unspec:DI [(zero_extend:DI
2731                      (match_operand:QHI 1 "indexed_or_indirect_operand" "Z"))]
2732                    UNSPEC_P9_MEMORY))]
2733   "TARGET_P9_VECTOR"
2734   "lxsi<wd>zx %x0,%y1"
2735   [(set_attr "type" "fpload")])
2736
2737 (define_insn "p9_stxsi<wd>x"
2738   [(set (match_operand:QHI 0 "reg_or_indexed_operand" "=r,Z")
2739         (unspec:QHI [(match_operand:DI 1 "vsx_register_operand" "wi,wi")]
2740                     UNSPEC_P9_MEMORY))]
2741   "TARGET_P9_VECTOR"
2742   "@
2743    mfvsrd %0,%x1
2744    stxsi<wd>x %x1,%y0"
2745   [(set_attr "type" "mffgpr,fpstore")])