main/gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for both scalar and vector floating point types supported by VSX
  22 (define_mode_iterator VSX_B [DF V4SF V2DF])
  23
  24 ;; Iterator for the 2 64-bit vector types
  25 (define_mode_iterator VSX_D [V2DF V2DI])
  26
  27 ;; Iterator for the 2 32-bit vector types
  28 (define_mode_iterator VSX_W [V4SF V4SI])
  29
  30 ;; Iterator for the DF types
  31 (define_mode_iterator VSX_DF [V2DF DF])
  32
  33 ;; Iterator for vector floating point types supported by VSX
  34 (define_mode_iterator VSX_F [V4SF V2DF])
  35
  36 ;; Iterator for logical types supported by VSX
  37 (define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
  38
  39 ;; Iterator for memory move.  Handle TImode specially to allow
  40 ;; it to use gprs as well as vsx registers.
  41 (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
  42
  43 (define_mode_iterator VSX_M2 [V16QI
  44                               V8HI
  45                               V4SI
  46                               V2DI
  47                               V4SF
  48                               V2DF
  49                               V1TI
  50                               (TI       "TARGET_VSX_TIMODE")])
  51
  52 ;; Map into the appropriate load/store name based on the type
  53 (define_mode_attr VSm  [(V16QI "vw4")
  54                         (V8HI  "vw4")
  55                         (V4SI  "vw4")
  56                         (V4SF  "vw4")
  57                         (V2DF  "vd2")
  58                         (V2DI  "vd2")
  59                         (DF    "d")
  60                         (V1TI  "vd2")
  61                         (TI    "vd2")])
  62
  63 ;; Map into the appropriate suffix based on the type
  64 (define_mode_attr VSs   [(V16QI "sp")
  65                          (V8HI  "sp")
  66                          (V4SI  "sp")
  67                          (V4SF  "sp")
  68                          (V2DF  "dp")
  69                          (V2DI  "dp")
  70                          (DF    "dp")
  71                          (SF    "sp")
  72                          (V1TI  "dp")
  73                          (TI    "dp")])
  74
  75 ;; Map the register class used
  76 (define_mode_attr VSr   [(V16QI "v")
  77                          (V8HI  "v")
  78                          (V4SI  "v")
  79                          (V4SF  "wf")
  80                          (V2DI  "wd")
  81                          (V2DF  "wd")
  82                          (DF    "ws")
  83                          (SF    "d")
  84                          (V1TI  "v")
  85                          (TI    "wt")])
  86
  87 ;; Map the register class used for float<->int conversions
  88 (define_mode_attr VSr2  [(V2DF  "wd")
  89                          (V4SF  "wf")
  90                          (DF    "ws")])
  91
  92 (define_mode_attr VSr3  [(V2DF  "wa")
  93                          (V4SF  "wa")
  94                          (DF    "ws")])
  95
  96 ;; Map the register class for sp<->dp float conversions, destination
  97 (define_mode_attr VSr4  [(SF    "ws")
  98                          (DF    "f")
  99                          (V2DF  "wd")
 100                          (V4SF  "v")])
 101
 102 ;; Map the register class for sp<->dp float conversions, destination
 103 (define_mode_attr VSr5  [(SF    "ws")
 104                          (DF    "f")
 105                          (V2DF  "v")
 106                          (V4SF  "wd")])
 107
 108 ;; Same size integer type for floating point data
 109 (define_mode_attr VSi [(V4SF  "v4si")
 110                        (V2DF  "v2di")
 111                        (DF    "di")])
 112
 113 (define_mode_attr VSI [(V4SF  "V4SI")
 114                        (V2DF  "V2DI")
 115                        (DF    "DI")])
 116
 117 ;; Word size for same size conversion
 118 (define_mode_attr VSc [(V4SF "w")
 119                        (V2DF "d")
 120                        (DF   "d")])
 121
 122 ;; Map into either s or v, depending on whether this is a scalar or vector
 123 ;; operation
 124 (define_mode_attr VSv   [(V16QI "v")
 125                          (V8HI  "v")
 126                          (V4SI  "v")
 127                          (V4SF  "v")
 128                          (V2DI  "v")
 129                          (V2DF  "v")
 130                          (V1TI  "v")
 131                          (DF    "s")])
 132
 133 ;; Appropriate type for add ops (and other simple FP ops)
 134 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 135                                  (V4SF "vecfloat")
 136                                  (DF   "fp")])
 137
 138 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
 139                                    (V4SF "fp_addsub_s")
 140                                    (DF   "fp_addsub_d")])
 141
 142 ;; Appropriate type for multiply ops
 143 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 144                                  (V4SF "vecfloat")
 145                                  (DF   "dmul")])
 146
 147 (define_mode_attr VSfptype_mul  [(V2DF "fp_mul_d")
 148                                  (V4SF "fp_mul_s")
 149                                  (DF   "fp_mul_d")])
 150
 151 ;; Appropriate type for divide ops.
 152 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 153                                  (V4SF "vecfdiv")
 154                                  (DF   "ddiv")])
 155
 156 (define_mode_attr VSfptype_div  [(V2DF "fp_div_d")
 157                                  (V4SF "fp_div_s")
 158                                  (DF   "fp_div_d")])
 159
 160 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 161 ;; the scalar sqrt
 162 (define_mode_attr VStype_sqrt   [(V2DF "dsqrt")
 163                                  (V4SF "ssqrt")
 164                                  (DF   "dsqrt")])
 165
 166 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
 167                                  (V4SF "fp_sqrt_s")
 168                                  (DF   "fp_sqrt_d")])
 169
 170 ;; Iterator and modes for sp<->dp conversions
 171 ;; Because scalar SF values are represented internally as double, use the
 172 ;; V4SF type to represent this than SF.
 173 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
 174
 175 (define_mode_attr VS_spdp_res [(DF      "V4SF")
 176                                (V4SF    "V2DF")
 177                                (V2DF    "V4SF")])
 178
 179 (define_mode_attr VS_spdp_insn [(DF     "xscvdpsp")
 180                                 (V4SF   "xvcvspdp")
 181                                 (V2DF   "xvcvdpsp")])
 182
 183 (define_mode_attr VS_spdp_type [(DF     "fp")
 184                                 (V4SF   "vecdouble")
 185                                 (V2DF   "vecdouble")])
 186
 187 ;; Map the scalar mode for a vector type
 188 (define_mode_attr VS_scalar [(V1TI      "TI")
 189                              (V2DF      "DF")
 190                              (V2DI      "DI")
 191                              (V4SF      "SF")
 192                              (V4SI      "SI")
 193                              (V8HI      "HI")
 194                              (V16QI     "QI")])
 195
 196 ;; Map to a double-sized vector mode
 197 (define_mode_attr VS_double [(V4SI      "V8SI")
 198                              (V4SF      "V8SF")
 199                              (V2DI      "V4DI")
 200                              (V2DF      "V4DF")
 201                              (V1TI      "V2TI")])
 202
 203 ;; Constants for creating unspecs
 204 (define_c_enum "unspec"
 205   [UNSPEC_VSX_CONCAT
 206    UNSPEC_VSX_CVDPSXWS
 207    UNSPEC_VSX_CVDPUXWS
 208    UNSPEC_VSX_CVSPDP
 209    UNSPEC_VSX_CVSPDPN
 210    UNSPEC_VSX_CVDPSPN
 211    UNSPEC_VSX_CVSXWDP
 212    UNSPEC_VSX_CVUXWDP
 213    UNSPEC_VSX_CVSXDSP
 214    UNSPEC_VSX_CVUXDSP
 215    UNSPEC_VSX_CVSPSXDS
 216    UNSPEC_VSX_CVSPUXDS
 217    UNSPEC_VSX_TDIV
 218    UNSPEC_VSX_TSQRT
 219    UNSPEC_VSX_SET
 220    UNSPEC_VSX_ROUND_I
 221    UNSPEC_VSX_ROUND_IC
 222    UNSPEC_VSX_SLDWI
 223    UNSPEC_VSX_XXSPLTW
 224   ])
 225
 226 ;; VSX moves
 227
 228 ;; The patterns for LE permuted loads and stores come before the general
 229 ;; VSX moves so they match first.
 230 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 231   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
 232         (match_operand:VSX_D 1 "memory_operand" "Z"))]
 233   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 234   "#"
 235   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 236   [(set (match_dup 2)
 237         (vec_select:<MODE>
 238           (match_dup 1)
 239           (parallel [(const_int 1) (const_int 0)])))
 240    (set (match_dup 0)
 241         (vec_select:<MODE>
 242           (match_dup 2)
 243           (parallel [(const_int 1) (const_int 0)])))]
 244   "
 245 {
 246   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 247                                        : operands[0];
 248 }
 249   "
 250   [(set_attr "type" "vecload")
 251    (set_attr "length" "8")])
 252
 253 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 254   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
 255         (match_operand:VSX_W 1 "memory_operand" "Z"))]
 256   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 257   "#"
 258   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 259   [(set (match_dup 2)
 260         (vec_select:<MODE>
 261           (match_dup 1)
 262           (parallel [(const_int 2) (const_int 3)
 263                      (const_int 0) (const_int 1)])))
 264    (set (match_dup 0)
 265         (vec_select:<MODE>
 266           (match_dup 2)
 267           (parallel [(const_int 2) (const_int 3)
 268                      (const_int 0) (const_int 1)])))]
 269   "
 270 {
 271   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 272                                        : operands[0];
 273 }
 274   "
 275   [(set_attr "type" "vecload")
 276    (set_attr "length" "8")])
 277
 278 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 279   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 280         (match_operand:V8HI 1 "memory_operand" "Z"))]
 281   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 282   "#"
 283   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 284   [(set (match_dup 2)
 285         (vec_select:V8HI
 286           (match_dup 1)
 287           (parallel [(const_int 4) (const_int 5)
 288                      (const_int 6) (const_int 7)
 289                      (const_int 0) (const_int 1)
 290                      (const_int 2) (const_int 3)])))
 291    (set (match_dup 0)
 292         (vec_select:V8HI
 293           (match_dup 2)
 294           (parallel [(const_int 4) (const_int 5)
 295                      (const_int 6) (const_int 7)
 296                      (const_int 0) (const_int 1)
 297                      (const_int 2) (const_int 3)])))]
 298   "
 299 {
 300   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 301                                        : operands[0];
 302 }
 303   "
 304   [(set_attr "type" "vecload")
 305    (set_attr "length" "8")])
 306
 307 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 308   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 309         (match_operand:V16QI 1 "memory_operand" "Z"))]
 310   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 311   "#"
 312   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 313   [(set (match_dup 2)
 314         (vec_select:V16QI
 315           (match_dup 1)
 316           (parallel [(const_int 8) (const_int 9)
 317                      (const_int 10) (const_int 11)
 318                      (const_int 12) (const_int 13)
 319                      (const_int 14) (const_int 15)
 320                      (const_int 0) (const_int 1)
 321                      (const_int 2) (const_int 3)
 322                      (const_int 4) (const_int 5)
 323                      (const_int 6) (const_int 7)])))
 324    (set (match_dup 0)
 325         (vec_select:V16QI
 326           (match_dup 2)
 327           (parallel [(const_int 8) (const_int 9)
 328                      (const_int 10) (const_int 11)
 329                      (const_int 12) (const_int 13)
 330                      (const_int 14) (const_int 15)
 331                      (const_int 0) (const_int 1)
 332                      (const_int 2) (const_int 3)
 333                      (const_int 4) (const_int 5)
 334                      (const_int 6) (const_int 7)])))]
 335   "
 336 {
 337   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 338                                        : operands[0];
 339 }
 340   "
 341   [(set_attr "type" "vecload")
 342    (set_attr "length" "8")])
 343
 344 (define_insn "*vsx_le_perm_store_<mode>"
 345   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
 346         (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
 347   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 348   "#"
 349   [(set_attr "type" "vecstore")
 350    (set_attr "length" "12")])
 351
 352 (define_split
 353   [(set (match_operand:VSX_D 0 "memory_operand" "")
 354         (match_operand:VSX_D 1 "vsx_register_operand" ""))]
 355   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
 356   [(set (match_dup 2)
 357         (vec_select:<MODE>
 358           (match_dup 1)
 359           (parallel [(const_int 1) (const_int 0)])))
 360    (set (match_dup 0)
 361         (vec_select:<MODE>
 362           (match_dup 2)
 363           (parallel [(const_int 1) (const_int 0)])))]
 364 {
 365   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 366                                        : operands[1];
 367 })
 368
 369 ;; The post-reload split requires that we re-permute the source
 370 ;; register in case it is still live.
 371 (define_split
 372   [(set (match_operand:VSX_D 0 "memory_operand" "")
 373         (match_operand:VSX_D 1 "vsx_register_operand" ""))]
 374   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
 375   [(set (match_dup 1)
 376         (vec_select:<MODE>
 377           (match_dup 1)
 378           (parallel [(const_int 1) (const_int 0)])))
 379    (set (match_dup 0)
 380         (vec_select:<MODE>
 381           (match_dup 1)
 382           (parallel [(const_int 1) (const_int 0)])))
 383    (set (match_dup 1)
 384         (vec_select:<MODE>
 385           (match_dup 1)
 386           (parallel [(const_int 1) (const_int 0)])))]
 387   "")
 388
 389 (define_insn "*vsx_le_perm_store_<mode>"
 390   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
 391         (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
 392   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 393   "#"
 394   [(set_attr "type" "vecstore")
 395    (set_attr "length" "12")])
 396
 397 (define_split
 398   [(set (match_operand:VSX_W 0 "memory_operand" "")
 399         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 400   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
 401   [(set (match_dup 2)
 402         (vec_select:<MODE>
 403           (match_dup 1)
 404           (parallel [(const_int 2) (const_int 3)
 405                      (const_int 0) (const_int 1)])))
 406    (set (match_dup 0)
 407         (vec_select:<MODE>
 408           (match_dup 2)
 409           (parallel [(const_int 2) (const_int 3)
 410                      (const_int 0) (const_int 1)])))]
 411 {
 412   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 413                                        : operands[1];
 414 })
 415
 416 ;; The post-reload split requires that we re-permute the source
 417 ;; register in case it is still live.
 418 (define_split
 419   [(set (match_operand:VSX_W 0 "memory_operand" "")
 420         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
 421   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
 422   [(set (match_dup 1)
 423         (vec_select:<MODE>
 424           (match_dup 1)
 425           (parallel [(const_int 2) (const_int 3)
 426                      (const_int 0) (const_int 1)])))
 427    (set (match_dup 0)
 428         (vec_select:<MODE>
 429           (match_dup 1)
 430           (parallel [(const_int 2) (const_int 3)
 431                      (const_int 0) (const_int 1)])))
 432    (set (match_dup 1)
 433         (vec_select:<MODE>
 434           (match_dup 1)
 435           (parallel [(const_int 2) (const_int 3)
 436                      (const_int 0) (const_int 1)])))]
 437   "")
 438
 439 (define_insn "*vsx_le_perm_store_v8hi"
 440   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
 441         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 442   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 443   "#"
 444   [(set_attr "type" "vecstore")
 445    (set_attr "length" "12")])
 446
 447 (define_split
 448   [(set (match_operand:V8HI 0 "memory_operand" "")
 449         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 450   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
 451   [(set (match_dup 2)
 452         (vec_select:V8HI
 453           (match_dup 1)
 454           (parallel [(const_int 4) (const_int 5)
 455                      (const_int 6) (const_int 7)
 456                      (const_int 0) (const_int 1)
 457                      (const_int 2) (const_int 3)])))
 458    (set (match_dup 0)
 459         (vec_select:V8HI
 460           (match_dup 2)
 461           (parallel [(const_int 4) (const_int 5)
 462                      (const_int 6) (const_int 7)
 463                      (const_int 0) (const_int 1)
 464                      (const_int 2) (const_int 3)])))]
 465 {
 466   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 467                                        : operands[1];
 468 })
 469
 470 ;; The post-reload split requires that we re-permute the source
 471 ;; register in case it is still live.
 472 (define_split
 473   [(set (match_operand:V8HI 0 "memory_operand" "")
 474         (match_operand:V8HI 1 "vsx_register_operand" ""))]
 475   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
 476   [(set (match_dup 1)
 477         (vec_select:V8HI
 478           (match_dup 1)
 479           (parallel [(const_int 4) (const_int 5)
 480                      (const_int 6) (const_int 7)
 481                      (const_int 0) (const_int 1)
 482                      (const_int 2) (const_int 3)])))
 483    (set (match_dup 0)
 484         (vec_select:V8HI
 485           (match_dup 1)
 486           (parallel [(const_int 4) (const_int 5)
 487                      (const_int 6) (const_int 7)
 488                      (const_int 0) (const_int 1)
 489                      (const_int 2) (const_int 3)])))
 490    (set (match_dup 1)
 491         (vec_select:V8HI
 492           (match_dup 1)
 493           (parallel [(const_int 4) (const_int 5)
 494                      (const_int 6) (const_int 7)
 495                      (const_int 0) (const_int 1)
 496                      (const_int 2) (const_int 3)])))]
 497   "")
 498
 499 (define_insn "*vsx_le_perm_store_v16qi"
 500   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
 501         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 502   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 503   "#"
 504   [(set_attr "type" "vecstore")
 505    (set_attr "length" "12")])
 506
 507 (define_split
 508   [(set (match_operand:V16QI 0 "memory_operand" "")
 509         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 510   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
 511   [(set (match_dup 2)
 512         (vec_select:V16QI
 513           (match_dup 1)
 514           (parallel [(const_int 8) (const_int 9)
 515                      (const_int 10) (const_int 11)
 516                      (const_int 12) (const_int 13)
 517                      (const_int 14) (const_int 15)
 518                      (const_int 0) (const_int 1)
 519                      (const_int 2) (const_int 3)
 520                      (const_int 4) (const_int 5)
 521                      (const_int 6) (const_int 7)])))
 522    (set (match_dup 0)
 523         (vec_select:V16QI
 524           (match_dup 2)
 525           (parallel [(const_int 8) (const_int 9)
 526                      (const_int 10) (const_int 11)
 527                      (const_int 12) (const_int 13)
 528                      (const_int 14) (const_int 15)
 529                      (const_int 0) (const_int 1)
 530                      (const_int 2) (const_int 3)
 531                      (const_int 4) (const_int 5)
 532                      (const_int 6) (const_int 7)])))]
 533 {
 534   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 535                                        : operands[1];
 536 })
 537
 538 ;; The post-reload split requires that we re-permute the source
 539 ;; register in case it is still live.
 540 (define_split
 541   [(set (match_operand:V16QI 0 "memory_operand" "")
 542         (match_operand:V16QI 1 "vsx_register_operand" ""))]
 543   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
 544   [(set (match_dup 1)
 545         (vec_select:V16QI
 546           (match_dup 1)
 547           (parallel [(const_int 8) (const_int 9)
 548                      (const_int 10) (const_int 11)
 549                      (const_int 12) (const_int 13)
 550                      (const_int 14) (const_int 15)
 551                      (const_int 0) (const_int 1)
 552                      (const_int 2) (const_int 3)
 553                      (const_int 4) (const_int 5)
 554                      (const_int 6) (const_int 7)])))
 555    (set (match_dup 0)
 556         (vec_select:V16QI
 557           (match_dup 1)
 558           (parallel [(const_int 8) (const_int 9)
 559                      (const_int 10) (const_int 11)
 560                      (const_int 12) (const_int 13)
 561                      (const_int 14) (const_int 15)
 562                      (const_int 0) (const_int 1)
 563                      (const_int 2) (const_int 3)
 564                      (const_int 4) (const_int 5)
 565                      (const_int 6) (const_int 7)])))
 566    (set (match_dup 1)
 567         (vec_select:V16QI
 568           (match_dup 1)
 569           (parallel [(const_int 8) (const_int 9)
 570                      (const_int 10) (const_int 11)
 571                      (const_int 12) (const_int 13)
 572                      (const_int 14) (const_int 15)
 573                      (const_int 0) (const_int 1)
 574                      (const_int 2) (const_int 3)
 575                      (const_int 4) (const_int 5)
 576                      (const_int 6) (const_int 7)])))]
 577   "")
 578
 579
 580 (define_insn "*vsx_mov<mode>"
 581   [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
 582         (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
 583   "VECTOR_MEM_VSX_P (<MODE>mode)
 584    && (register_operand (operands[0], <MODE>mode)
 585        || register_operand (operands[1], <MODE>mode))"
 586 {
 587   return rs6000_output_move_128bit (operands);
 588 }
 589   [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
 590    (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
 591
 592 ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
 593 ;; use of TImode is for unions.  However for plain data movement, slightly
 594 ;; favor the vector loads
 595 (define_insn "*vsx_movti_64bit"
 596   [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
 597         (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
 598   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
 599    && (register_operand (operands[0], TImode)
 600        || register_operand (operands[1], TImode))"
 601 {
 602   return rs6000_output_move_128bit (operands);
 603 }
 604   [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
 605    (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
 606
 607 (define_insn "*vsx_movti_32bit"
 608   [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
 609         (match_operand:TI 1 "input_operand"        "wa, Z,wa, O,W,wZ, v,r,r,    Q,    Y,    r,n"))]
 610   "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
 611    && (register_operand (operands[0], TImode)
 612        || register_operand (operands[1], TImode))"
 613 {
 614   switch (which_alternative)
 615     {
 616     case 0:
 617       return "stxvd2x %x1,%y0";
 618
 619     case 1:
 620       return "lxvd2x %x0,%y1";
 621
 622     case 2:
 623       return "xxlor %x0,%x1,%x1";
 624
 625     case 3:
 626       return "xxlxor %x0,%x0,%x0";
 627
 628     case 4:
 629       return output_vec_const_move (operands);
 630
 631     case 5:
 632       return "stvx %1,%y0";
 633
 634     case 6:
 635       return "lvx %0,%y1";
 636
 637     case 7:
 638       if (TARGET_STRING)
 639         return \"stswi %1,%P0,16\";
 640
 641     case 8:
 642       return \"#\";
 643
 644     case 9:
 645       /* If the address is not used in the output, we can use lsi.  Otherwise,
 646          fall through to generating four loads.  */
 647       if (TARGET_STRING
 648           && ! reg_overlap_mentioned_p (operands[0], operands[1]))
 649         return \"lswi %0,%P1,16\";
 650       /* ... fall through ...  */
 651
 652     case 10:
 653     case 11:
 654     case 12:
 655       return \"#\";
 656     default:
 657       gcc_unreachable ();
 658     }
 659 }
 660   [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *")
 661    (set_attr "length" "     4,      4,        4,       4,         8,       4,      4,      16,      16,     16,     16,16,16")
 662    (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
 663                                           (const_string "always")
 664                                           (const_string "conditional")))])
 665
 666 ;; Explicit  load/store expanders for the builtin functions
 667 (define_expand "vsx_load_<mode>"
 668   [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
 669         (match_operand:VSX_M 1 "memory_operand" ""))]
 670   "VECTOR_MEM_VSX_P (<MODE>mode)"
 671   "")
 672
 673 (define_expand "vsx_store_<mode>"
 674   [(set (match_operand:VSX_M 0 "memory_operand" "")
 675         (match_operand:VSX_M 1 "vsx_register_operand" ""))]
 676   "VECTOR_MEM_VSX_P (<MODE>mode)"
 677   "")
 678
 679 \f
 680 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
 681 ;; instructions are now combined with the insn for the traditional floating
 682 ;; point unit.
 683 (define_insn "*vsx_add<mode>3"
 684   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 685         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
 686                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
 687   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 688   "xvadd<VSs> %x0,%x1,%x2"
 689   [(set_attr "type" "<VStype_simple>")
 690    (set_attr "fp_type" "<VSfptype_simple>")])
 691
 692 (define_insn "*vsx_sub<mode>3"
 693   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 694         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
 695                      (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
 696   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 697   "xvsub<VSs> %x0,%x1,%x2"
 698   [(set_attr "type" "<VStype_simple>")
 699    (set_attr "fp_type" "<VSfptype_simple>")])
 700
 701 (define_insn "*vsx_mul<mode>3"
 702   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 703         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
 704                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
 705   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 706   "xvmul<VSs> %x0,%x1,%x2"
 707   [(set_attr "type" "<VStype_simple>")
 708    (set_attr "fp_type" "<VSfptype_mul>")])
 709
 710 (define_insn "*vsx_div<mode>3"
 711   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 712         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
 713                    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
 714   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 715   "xvdiv<VSs> %x0,%x1,%x2"
 716   [(set_attr "type" "<VStype_div>")
 717    (set_attr "fp_type" "<VSfptype_div>")])
 718
 719 ;; *tdiv* instruction returning the FG flag
 720 (define_expand "vsx_tdiv<mode>3_fg"
 721   [(set (match_dup 3)
 722         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
 723                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
 724                      UNSPEC_VSX_TDIV))
 725    (set (match_operand:SI 0 "gpc_reg_operand" "")
 726         (gt:SI (match_dup 3)
 727                (const_int 0)))]
 728   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 729 {
 730   operands[3] = gen_reg_rtx (CCFPmode);
 731 })
 732
 733 ;; *tdiv* instruction returning the FE flag
 734 (define_expand "vsx_tdiv<mode>3_fe"
 735   [(set (match_dup 3)
 736         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
 737                       (match_operand:VSX_B 2 "vsx_register_operand" "")]
 738                      UNSPEC_VSX_TDIV))
 739    (set (match_operand:SI 0 "gpc_reg_operand" "")
 740         (eq:SI (match_dup 3)
 741                (const_int 0)))]
 742   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 743 {
 744   operands[3] = gen_reg_rtx (CCFPmode);
 745 })
 746
 747 (define_insn "*vsx_tdiv<mode>3_internal"
 748   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
 749         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
 750                       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
 751                    UNSPEC_VSX_TDIV))]
 752   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 753   "x<VSv>tdiv<VSs> %0,%x1,%x2"
 754   [(set_attr "type" "<VStype_simple>")
 755    (set_attr "fp_type" "<VSfptype_simple>")])
 756
 757 (define_insn "vsx_fre<mode>2"
 758   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 759         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
 760                       UNSPEC_FRES))]
 761   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 762   "xvre<VSs> %x0,%x1"
 763   [(set_attr "type" "<VStype_simple>")
 764    (set_attr "fp_type" "<VSfptype_simple>")])
 765
 766 (define_insn "*vsx_neg<mode>2"
 767   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 768         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
 769   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 770   "xvneg<VSs> %x0,%x1"
 771   [(set_attr "type" "<VStype_simple>")
 772    (set_attr "fp_type" "<VSfptype_simple>")])
 773
 774 (define_insn "*vsx_abs<mode>2"
 775   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 776         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
 777   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 778   "xvabs<VSs> %x0,%x1"
 779   [(set_attr "type" "<VStype_simple>")
 780    (set_attr "fp_type" "<VSfptype_simple>")])
 781
 782 (define_insn "vsx_nabs<mode>2"
 783   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 784         (neg:VSX_F
 785          (abs:VSX_F
 786           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa"))))]
 787   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 788   "xvnabs<VSs> %x0,%x1"
 789   [(set_attr "type" "<VStype_simple>")
 790    (set_attr "fp_type" "<VSfptype_simple>")])
 791
 792 (define_insn "vsx_smax<mode>3"
 793   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 794         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
 795                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
 796   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 797   "xvmax<VSs> %x0,%x1,%x2"
 798   [(set_attr "type" "<VStype_simple>")
 799    (set_attr "fp_type" "<VSfptype_simple>")])
 800
 801 (define_insn "*vsx_smin<mode>3"
 802   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 803         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
 804                     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
 805   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 806   "xvmin<VSs> %x0,%x1,%x2"
 807   [(set_attr "type" "<VStype_simple>")
 808    (set_attr "fp_type" "<VSfptype_simple>")])
 809
 810 (define_insn "*vsx_sqrt<mode>2"
 811   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 812         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
 813   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 814   "xvsqrt<VSs> %x0,%x1"
 815   [(set_attr "type" "<VStype_sqrt>")
 816    (set_attr "fp_type" "<VSfptype_sqrt>")])
 817
 818 (define_insn "*vsx_rsqrte<mode>2"
 819   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 820         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
 821                       UNSPEC_RSQRT))]
 822   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 823   "xvrsqrte<VSs> %x0,%x1"
 824   [(set_attr "type" "<VStype_simple>")
 825    (set_attr "fp_type" "<VSfptype_simple>")])
 826
 827 ;; *tsqrt* returning the fg flag
 828 (define_expand "vsx_tsqrt<mode>2_fg"
 829   [(set (match_dup 3)
 830         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
 831                      UNSPEC_VSX_TSQRT))
 832    (set (match_operand:SI 0 "gpc_reg_operand" "")
 833         (gt:SI (match_dup 3)
 834                (const_int 0)))]
 835   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 836 {
 837   operands[3] = gen_reg_rtx (CCFPmode);
 838 })
 839
 840 ;; *tsqrt* returning the fe flag
 841 (define_expand "vsx_tsqrt<mode>2_fe"
 842   [(set (match_dup 3)
 843         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
 844                      UNSPEC_VSX_TSQRT))
 845    (set (match_operand:SI 0 "gpc_reg_operand" "")
 846         (eq:SI (match_dup 3)
 847                (const_int 0)))]
 848   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 849 {
 850   operands[3] = gen_reg_rtx (CCFPmode);
 851 })
 852
 853 (define_insn "*vsx_tsqrt<mode>2_internal"
 854   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
 855         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
 856                      UNSPEC_VSX_TSQRT))]
 857   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 858   "x<VSv>tsqrt<VSs> %0,%x1"
 859   [(set_attr "type" "<VStype_simple>")
 860    (set_attr "fp_type" "<VSfptype_simple>")])
 861
 862 ;; Fused vector multiply/add instructions. Support the classical Altivec
 863 ;; versions of fma, which allows the target to be a separate register from the
 864 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
 865 ;; multiply.
 866
 867 (define_insn "*vsx_fmav4sf4"
 868   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
 869         (fma:V4SF
 870           (match_operand:V4SF 1 "vsx_register_operand" "%ws,ws,wa,wa,v")
 871           (match_operand:V4SF 2 "vsx_register_operand" "ws,0,wa,0,v")
 872           (match_operand:V4SF 3 "vsx_register_operand" "0,ws,0,wa,v")))]
 873   "VECTOR_UNIT_VSX_P (V4SFmode)"
 874   "@
 875    xvmaddasp %x0,%x1,%x2
 876    xvmaddmsp %x0,%x1,%x3
 877    xvmaddasp %x0,%x1,%x2
 878    xvmaddmsp %x0,%x1,%x3
 879    vmaddfp %0,%1,%2,%3"
 880   [(set_attr "type" "vecfloat")])
 881
 882 (define_insn "*vsx_fmav2df4"
 883   [(set (match_operand:V2DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa")
 884         (fma:V2DF
 885           (match_operand:V2DF 1 "vsx_register_operand" "%ws,ws,wa,wa")
 886           (match_operand:V2DF 2 "vsx_register_operand" "ws,0,wa,0")
 887           (match_operand:V2DF 3 "vsx_register_operand" "0,ws,0,wa")))]
 888   "VECTOR_UNIT_VSX_P (V2DFmode)"
 889   "@
 890    xvmaddadp %x0,%x1,%x2
 891    xvmaddmdp %x0,%x1,%x3
 892    xvmaddadp %x0,%x1,%x2
 893    xvmaddmdp %x0,%x1,%x3"
 894   [(set_attr "type" "vecdouble")])
 895
 896 (define_insn "*vsx_fms<mode>4"
 897   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
 898         (fma:VSX_F
 899           (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
 900           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0")
 901           (neg:VSX_F
 902             (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
 903   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 904   "@
 905    xvmsuba<VSs> %x0,%x1,%x2
 906    xvmsubm<VSs> %x0,%x1,%x3
 907    xvmsuba<VSs> %x0,%x1,%x2
 908    xvmsubm<VSs> %x0,%x1,%x3"
 909   [(set_attr "type" "<VStype_mul>")])
 910
 911 (define_insn "*vsx_nfma<mode>4"
 912   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
 913         (neg:VSX_F
 914          (fma:VSX_F
 915           (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
 916           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0")
 917           (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
 918   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 919   "@
 920    xvnmadda<VSs> %x0,%x1,%x2
 921    xvnmaddm<VSs> %x0,%x1,%x3
 922    xvnmadda<VSs> %x0,%x1,%x2
 923    xvnmaddm<VSs> %x0,%x1,%x3"
 924   [(set_attr "type" "<VStype_mul>")
 925    (set_attr "fp_type" "<VSfptype_mul>")])
 926
 927 (define_insn "*vsx_nfmsv4sf4"
 928   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
 929         (neg:V4SF
 930          (fma:V4SF
 931            (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
 932            (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
 933            (neg:V4SF
 934              (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
 935   "VECTOR_UNIT_VSX_P (V4SFmode)"
 936   "@
 937    xvnmsubasp %x0,%x1,%x2
 938    xvnmsubmsp %x0,%x1,%x3
 939    xvnmsubasp %x0,%x1,%x2
 940    xvnmsubmsp %x0,%x1,%x3
 941    vnmsubfp %0,%1,%2,%3"
 942   [(set_attr "type" "vecfloat")])
 943
 944 (define_insn "*vsx_nfmsv2df4"
 945   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
 946         (neg:V2DF
 947          (fma:V2DF
 948            (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
 949            (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
 950            (neg:V2DF
 951              (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
 952   "VECTOR_UNIT_VSX_P (V2DFmode)"
 953   "@
 954    xvnmsubadp %x0,%x1,%x2
 955    xvnmsubmdp %x0,%x1,%x3
 956    xvnmsubadp %x0,%x1,%x2
 957    xvnmsubmdp %x0,%x1,%x3"
 958   [(set_attr "type" "vecdouble")])
 959
 960 ;; Vector conditional expressions (no scalar version for these instructions)
 961 (define_insn "vsx_eq<mode>"
 962   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 963         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
 964                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
 965   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 966   "xvcmpeq<VSs> %x0,%x1,%x2"
 967   [(set_attr "type" "<VStype_simple>")
 968    (set_attr "fp_type" "<VSfptype_simple>")])
 969
 970 (define_insn "vsx_gt<mode>"
 971   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 972         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
 973                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
 974   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 975   "xvcmpgt<VSs> %x0,%x1,%x2"
 976   [(set_attr "type" "<VStype_simple>")
 977    (set_attr "fp_type" "<VSfptype_simple>")])
 978
 979 (define_insn "*vsx_ge<mode>"
 980   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 981         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
 982                   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
 983   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 984   "xvcmpge<VSs> %x0,%x1,%x2"
 985   [(set_attr "type" "<VStype_simple>")
 986    (set_attr "fp_type" "<VSfptype_simple>")])
 987
 988 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
 989 ;; indicate a combined status
 990 (define_insn "*vsx_eq_<mode>_p"
 991   [(set (reg:CC 74)
 992         (unspec:CC
 993          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
 994                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
 995          UNSPEC_PREDICATE))
 996    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
 997         (eq:VSX_F (match_dup 1)
 998                   (match_dup 2)))]
 999   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1000   "xvcmpeq<VSs>. %x0,%x1,%x2"
1001   [(set_attr "type" "<VStype_simple>")])
1002
1003 (define_insn "*vsx_gt_<mode>_p"
1004   [(set (reg:CC 74)
1005         (unspec:CC
1006          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
1007                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
1008          UNSPEC_PREDICATE))
1009    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1010         (gt:VSX_F (match_dup 1)
1011                   (match_dup 2)))]
1012   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1013   "xvcmpgt<VSs>. %x0,%x1,%x2"
1014   [(set_attr "type" "<VStype_simple>")])
1015
1016 (define_insn "*vsx_ge_<mode>_p"
1017   [(set (reg:CC 74)
1018         (unspec:CC
1019          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
1020                  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
1021          UNSPEC_PREDICATE))
1022    (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1023         (ge:VSX_F (match_dup 1)
1024                   (match_dup 2)))]
1025   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1026   "xvcmpge<VSs>. %x0,%x1,%x2"
1027   [(set_attr "type" "<VStype_simple>")])
1028
1029 ;; Vector select
1030 (define_insn "*vsx_xxsel<mode>"
1031   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1032         (if_then_else:VSX_L
1033          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
1034                 (match_operand:VSX_L 4 "zero_constant" ""))
1035          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
1036          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
1037   "VECTOR_MEM_VSX_P (<MODE>mode)"
1038   "xxsel %x0,%x3,%x2,%x1"
1039   [(set_attr "type" "vecperm")])
1040
1041 (define_insn "*vsx_xxsel<mode>_uns"
1042   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1043         (if_then_else:VSX_L
1044          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
1045                    (match_operand:VSX_L 4 "zero_constant" ""))
1046          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
1047          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
1048   "VECTOR_MEM_VSX_P (<MODE>mode)"
1049   "xxsel %x0,%x3,%x2,%x1"
1050   [(set_attr "type" "vecperm")])
1051
1052 ;; Copy sign
1053 (define_insn "vsx_copysign<mode>3"
1054   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1055         (unspec:VSX_F
1056          [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
1057           (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")]
1058          UNSPEC_COPYSIGN))]
1059   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1060   "xvcpsgn<VSs> %x0,%x2,%x1"
1061   [(set_attr "type" "<VStype_simple>")
1062    (set_attr "fp_type" "<VSfptype_simple>")])
1063
1064 ;; For the conversions, limit the register class for the integer value to be
1065 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1066 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1067 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1068 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md.
1069 (define_insn "vsx_float<VSi><mode>2"
1070   [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
1071         (float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1072   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1073   "x<VSv>cvsx<VSc><VSs> %x0,%x1"
1074   [(set_attr "type" "<VStype_simple>")
1075    (set_attr "fp_type" "<VSfptype_simple>")])
1076
1077 (define_insn "vsx_floatuns<VSi><mode>2"
1078   [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
1079         (unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1080   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1081   "x<VSv>cvux<VSc><VSs> %x0,%x1"
1082   [(set_attr "type" "<VStype_simple>")
1083    (set_attr "fp_type" "<VSfptype_simple>")])
1084
1085 (define_insn "vsx_fix_trunc<mode><VSi>2"
1086   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1087         (fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
1088   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1089   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1090   [(set_attr "type" "<VStype_simple>")
1091    (set_attr "fp_type" "<VSfptype_simple>")])
1092
1093 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1094   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1095         (unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
1096   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1097   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1098   [(set_attr "type" "<VStype_simple>")
1099    (set_attr "fp_type" "<VSfptype_simple>")])
1100
1101 ;; Math rounding functions
1102 (define_insn "vsx_x<VSv>r<VSs>i"
1103   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
1104         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
1105                       UNSPEC_VSX_ROUND_I))]
1106   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1107   "x<VSv>r<VSs>i %x0,%x1"
1108   [(set_attr "type" "<VStype_simple>")
1109    (set_attr "fp_type" "<VSfptype_simple>")])
1110
1111 (define_insn "vsx_x<VSv>r<VSs>ic"
1112   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
1113         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
1114                       UNSPEC_VSX_ROUND_IC))]
1115   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1116   "x<VSv>r<VSs>ic %x0,%x1"
1117   [(set_attr "type" "<VStype_simple>")
1118    (set_attr "fp_type" "<VSfptype_simple>")])
1119
1120 (define_insn "vsx_btrunc<mode>2"
1121   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1122         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
1123   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1124   "xvr<VSs>iz %x0,%x1"
1125   [(set_attr "type" "<VStype_simple>")
1126    (set_attr "fp_type" "<VSfptype_simple>")])
1127
1128 (define_insn "*vsx_b2trunc<mode>2"
1129   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
1130         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
1131                       UNSPEC_FRIZ))]
1132   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1133   "x<VSv>r<VSs>iz %x0,%x1"
1134   [(set_attr "type" "<VStype_simple>")
1135    (set_attr "fp_type" "<VSfptype_simple>")])
1136
1137 (define_insn "vsx_floor<mode>2"
1138   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1139         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
1140                       UNSPEC_FRIM))]
1141   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1142   "xvr<VSs>im %x0,%x1"
1143   [(set_attr "type" "<VStype_simple>")
1144    (set_attr "fp_type" "<VSfptype_simple>")])
1145
1146 (define_insn "vsx_ceil<mode>2"
1147   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1148         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
1149                       UNSPEC_FRIP))]
1150   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1151   "xvr<VSs>ip %x0,%x1"
1152   [(set_attr "type" "<VStype_simple>")
1153    (set_attr "fp_type" "<VSfptype_simple>")])
1154
1155 \f
1156 ;; VSX convert to/from double vector
1157
1158 ;; Convert between single and double precision
1159 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1160 ;; scalar single precision instructions internally use the double format.
1161 ;; Prefer the altivec registers, since we likely will need to do a vperm
1162 (define_insn "vsx_<VS_spdp_insn>"
1163   [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?wa")
1164         (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,wa")]
1165                               UNSPEC_VSX_CVSPDP))]
1166   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1167   "<VS_spdp_insn> %x0,%x1"
1168   [(set_attr "type" "<VS_spdp_type>")])
1169
1170 ;; xscvspdp, represent the scalar SF type as V4SF
1171 (define_insn "vsx_xscvspdp"
1172   [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
1173         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
1174                    UNSPEC_VSX_CVSPDP))]
1175   "VECTOR_UNIT_VSX_P (V4SFmode)"
1176   "xscvspdp %x0,%x1"
1177   [(set_attr "type" "fp")])
1178
1179 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1180 ;; format of scalars is actually DF.
1181 (define_insn "vsx_xscvdpsp_scalar"
1182   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1183         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1184                      UNSPEC_VSX_CVSPDP))]
1185   "VECTOR_UNIT_VSX_P (V4SFmode)"
1186   "xscvdpsp %x0,%x1"
1187   [(set_attr "type" "fp")])
1188
1189 ;; Same as vsx_xscvspdp, but use SF as the type
1190 (define_insn "vsx_xscvspdp_scalar2"
1191   [(set (match_operand:SF 0 "vsx_register_operand" "=f")
1192         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1193                    UNSPEC_VSX_CVSPDP))]
1194   "VECTOR_UNIT_VSX_P (V4SFmode)"
1195   "xscvspdp %x0,%x1"
1196   [(set_attr "type" "fp")])
1197
1198 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1199 (define_insn "vsx_xscvdpspn"
1200   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
1201         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1202                      UNSPEC_VSX_CVDPSPN))]
1203   "TARGET_XSCVDPSPN"
1204   "xscvdpspn %x0,%x1"
1205   [(set_attr "type" "fp")])
1206
1207 (define_insn "vsx_xscvspdpn"
1208   [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
1209         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
1210                    UNSPEC_VSX_CVSPDPN))]
1211   "TARGET_XSCVSPDPN"
1212   "xscvspdpn %x0,%x1"
1213   [(set_attr "type" "fp")])
1214
1215 (define_insn "vsx_xscvdpspn_scalar"
1216   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1217         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1218                      UNSPEC_VSX_CVDPSPN))]
1219   "TARGET_XSCVDPSPN"
1220   "xscvdpspn %x0,%x1"
1221   [(set_attr "type" "fp")])
1222
1223 ;; Used by direct move to move a SFmode value from GPR to VSX register
1224 (define_insn "vsx_xscvspdpn_directmove"
1225   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1226         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
1227                    UNSPEC_VSX_CVSPDPN))]
1228   "TARGET_XSCVSPDPN"
1229   "xscvspdpn %x0,%x1"
1230   [(set_attr "type" "fp")])
1231
1232 ;; Convert from 64-bit to 32-bit types
1233 ;; Note, favor the Altivec registers since the usual use of these instructions
1234 ;; is in vector converts and we need to use the Altivec vperm instruction.
1235
1236 (define_insn "vsx_xvcvdpsxws"
1237   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1238         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1239                      UNSPEC_VSX_CVDPSXWS))]
1240   "VECTOR_UNIT_VSX_P (V2DFmode)"
1241   "xvcvdpsxws %x0,%x1"
1242   [(set_attr "type" "vecdouble")])
1243
1244 (define_insn "vsx_xvcvdpuxws"
1245   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1246         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1247                      UNSPEC_VSX_CVDPUXWS))]
1248   "VECTOR_UNIT_VSX_P (V2DFmode)"
1249   "xvcvdpuxws %x0,%x1"
1250   [(set_attr "type" "vecdouble")])
1251
1252 (define_insn "vsx_xvcvsxdsp"
1253   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1254         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1255                      UNSPEC_VSX_CVSXDSP))]
1256   "VECTOR_UNIT_VSX_P (V2DFmode)"
1257   "xvcvsxdsp %x0,%x1"
1258   [(set_attr "type" "vecfloat")])
1259
1260 (define_insn "vsx_xvcvuxdsp"
1261   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1262         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1263                      UNSPEC_VSX_CVUXDSP))]
1264   "VECTOR_UNIT_VSX_P (V2DFmode)"
1265   "xvcvuxwdp %x0,%x1"
1266   [(set_attr "type" "vecdouble")])
1267
1268 ;; Convert from 32-bit to 64-bit types
1269 (define_insn "vsx_xvcvsxwdp"
1270   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1271         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1272                      UNSPEC_VSX_CVSXWDP))]
1273   "VECTOR_UNIT_VSX_P (V2DFmode)"
1274   "xvcvsxwdp %x0,%x1"
1275   [(set_attr "type" "vecdouble")])
1276
1277 (define_insn "vsx_xvcvuxwdp"
1278   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1279         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1280                      UNSPEC_VSX_CVUXWDP))]
1281   "VECTOR_UNIT_VSX_P (V2DFmode)"
1282   "xvcvuxwdp %x0,%x1"
1283   [(set_attr "type" "vecdouble")])
1284
1285 (define_insn "vsx_xvcvspsxds"
1286   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1287         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1288                      UNSPEC_VSX_CVSPSXDS))]
1289   "VECTOR_UNIT_VSX_P (V2DFmode)"
1290   "xvcvspsxds %x0,%x1"
1291   [(set_attr "type" "vecdouble")])
1292
1293 (define_insn "vsx_xvcvspuxds"
1294   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1295         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1296                      UNSPEC_VSX_CVSPUXDS))]
1297   "VECTOR_UNIT_VSX_P (V2DFmode)"
1298   "xvcvspuxds %x0,%x1"
1299   [(set_attr "type" "vecdouble")])
1300
1301 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1302 ;; since the xsrdpiz instruction does not truncate the value if the floating
1303 ;; point value is < LONG_MIN or > LONG_MAX.
1304 (define_insn "*vsx_float_fix_<mode>2"
1305   [(set (match_operand:VSX_DF 0 "vsx_register_operand" "=<VSr>,?wa")
1306         (float:VSX_DF
1307          (fix:<VSI>
1308           (match_operand:VSX_DF 1 "vsx_register_operand" "<VSr>,?wa"))))]
1309   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1310    && VECTOR_UNIT_VSX_P (<MODE>mode) && flag_unsafe_math_optimizations
1311    && !flag_trapping_math && TARGET_FRIZ"
1312   "x<VSv>r<VSs>iz %x0,%x1"
1313   [(set_attr "type" "<VStype_simple>")
1314    (set_attr "fp_type" "<VSfptype_simple>")])
1315
1316 \f
1317 ;; Permute operations
1318
1319 ;; Build a V2DF/V2DI vector from two scalars
1320 (define_insn "vsx_concat_<mode>"
1321   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa")
1322         (vec_concat:VSX_D
1323          (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
1324          (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))]
1325   "VECTOR_MEM_VSX_P (<MODE>mode)"
1326 {
1327   if (BYTES_BIG_ENDIAN)
1328     return "xxpermdi %x0,%x1,%x2,0";
1329   else
1330     return "xxpermdi %x0,%x2,%x1,0";
1331 }
1332   [(set_attr "type" "vecperm")])
1333
1334 ;; Special purpose concat using xxpermdi to glue two single precision values
1335 ;; together, relying on the fact that internally scalar floats are represented
1336 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
1337 (define_insn "vsx_concat_v2sf"
1338   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1339         (unspec:V2DF
1340          [(match_operand:SF 1 "vsx_register_operand" "f,f")
1341           (match_operand:SF 2 "vsx_register_operand" "f,f")]
1342          UNSPEC_VSX_CONCAT))]
1343   "VECTOR_MEM_VSX_P (V2DFmode)"
1344 {
1345   if (BYTES_BIG_ENDIAN)
1346     return "xxpermdi %x0,%x1,%x2,0";
1347   else
1348     return "xxpermdi %x0,%x2,%x1,0";
1349 }
1350   [(set_attr "type" "vecperm")])
1351
1352 ;; xxpermdi for little endian loads and stores.  We need several of
1353 ;; these since the form of the PARALLEL differs by mode.
1354 (define_insn "*vsx_xxpermdi2_le_<mode>"
1355   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
1356         (vec_select:VSX_D
1357           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
1358           (parallel [(const_int 1) (const_int 0)])))]
1359   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1360   "xxpermdi %x0,%x1,%x1,2"
1361   [(set_attr "type" "vecperm")])
1362
1363 (define_insn "*vsx_xxpermdi4_le_<mode>"
1364   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
1365         (vec_select:VSX_W
1366           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
1367           (parallel [(const_int 2) (const_int 3)
1368                      (const_int 0) (const_int 1)])))]
1369   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1370   "xxpermdi %x0,%x1,%x1,2"
1371   [(set_attr "type" "vecperm")])
1372
1373 (define_insn "*vsx_xxpermdi8_le_V8HI"
1374   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1375         (vec_select:V8HI
1376           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1377           (parallel [(const_int 4) (const_int 5)
1378                      (const_int 6) (const_int 7)
1379                      (const_int 0) (const_int 1)
1380                      (const_int 2) (const_int 3)])))]
1381   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1382   "xxpermdi %x0,%x1,%x1,2"
1383   [(set_attr "type" "vecperm")])
1384
1385 (define_insn "*vsx_xxpermdi16_le_V16QI"
1386   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1387         (vec_select:V16QI
1388           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1389           (parallel [(const_int 8) (const_int 9)
1390                      (const_int 10) (const_int 11)
1391                      (const_int 12) (const_int 13)
1392                      (const_int 14) (const_int 15)
1393                      (const_int 0) (const_int 1)
1394                      (const_int 2) (const_int 3)
1395                      (const_int 4) (const_int 5)
1396                      (const_int 6) (const_int 7)])))]
1397   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1398   "xxpermdi %x0,%x1,%x1,2"
1399   [(set_attr "type" "vecperm")])
1400
1401 ;; lxvd2x for little endian loads.  We need several of
1402 ;; these since the form of the PARALLEL differs by mode.
1403 (define_insn "*vsx_lxvd2x2_le_<mode>"
1404   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
1405         (vec_select:VSX_D
1406           (match_operand:VSX_D 1 "memory_operand" "Z")
1407           (parallel [(const_int 1) (const_int 0)])))]
1408   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1409   "lxvd2x %x0,%y1"
1410   [(set_attr "type" "vecload")])
1411
1412 (define_insn "*vsx_lxvd2x4_le_<mode>"
1413   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
1414         (vec_select:VSX_W
1415           (match_operand:VSX_W 1 "memory_operand" "Z")
1416           (parallel [(const_int 2) (const_int 3)
1417                      (const_int 0) (const_int 1)])))]
1418   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1419   "lxvd2x %x0,%y1"
1420   [(set_attr "type" "vecload")])
1421
1422 (define_insn "*vsx_lxvd2x8_le_V8HI"
1423   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1424         (vec_select:V8HI
1425           (match_operand:V8HI 1 "memory_operand" "Z")
1426           (parallel [(const_int 4) (const_int 5)
1427                      (const_int 6) (const_int 7)
1428                      (const_int 0) (const_int 1)
1429                      (const_int 2) (const_int 3)])))]
1430   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1431   "lxvd2x %x0,%y1"
1432   [(set_attr "type" "vecload")])
1433
1434 (define_insn "*vsx_lxvd2x16_le_V16QI"
1435   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1436         (vec_select:V16QI
1437           (match_operand:V16QI 1 "memory_operand" "Z")
1438           (parallel [(const_int 8) (const_int 9)
1439                      (const_int 10) (const_int 11)
1440                      (const_int 12) (const_int 13)
1441                      (const_int 14) (const_int 15)
1442                      (const_int 0) (const_int 1)
1443                      (const_int 2) (const_int 3)
1444                      (const_int 4) (const_int 5)
1445                      (const_int 6) (const_int 7)])))]
1446   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1447   "lxvd2x %x0,%y1"
1448   [(set_attr "type" "vecload")])
1449
1450 ;; stxvd2x for little endian stores.  We need several of
1451 ;; these since the form of the PARALLEL differs by mode.
1452 (define_insn "*vsx_stxvd2x2_le_<mode>"
1453   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
1454         (vec_select:VSX_D
1455           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
1456           (parallel [(const_int 1) (const_int 0)])))]
1457   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1458   "stxvd2x %x1,%y0"
1459   [(set_attr "type" "vecstore")])
1460
1461 (define_insn "*vsx_stxvd2x4_le_<mode>"
1462   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
1463         (vec_select:VSX_W
1464           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
1465           (parallel [(const_int 2) (const_int 3)
1466                      (const_int 0) (const_int 1)])))]
1467   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1468   "stxvd2x %x1,%y0"
1469   [(set_attr "type" "vecstore")])
1470
1471 (define_insn "*vsx_stxvd2x8_le_V8HI"
1472   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1473         (vec_select:V8HI
1474           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1475           (parallel [(const_int 4) (const_int 5)
1476                      (const_int 6) (const_int 7)
1477                      (const_int 0) (const_int 1)
1478                      (const_int 2) (const_int 3)])))]
1479   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1480   "stxvd2x %x1,%y0"
1481   [(set_attr "type" "vecstore")])
1482
1483 (define_insn "*vsx_stxvd2x16_le_V16QI"
1484   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1485         (vec_select:V16QI
1486           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1487           (parallel [(const_int 8) (const_int 9)
1488                      (const_int 10) (const_int 11)
1489                      (const_int 12) (const_int 13)
1490                      (const_int 14) (const_int 15)
1491                      (const_int 0) (const_int 1)
1492                      (const_int 2) (const_int 3)
1493                      (const_int 4) (const_int 5)
1494                      (const_int 6) (const_int 7)])))]
1495   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1496   "stxvd2x %x1,%y0"
1497   [(set_attr "type" "vecstore")])
1498
1499 ;; Convert a TImode value into V1TImode
1500 (define_expand "vsx_set_v1ti"
1501   [(match_operand:V1TI 0 "nonimmediate_operand" "")
1502    (match_operand:V1TI 1 "nonimmediate_operand" "")
1503    (match_operand:TI 2 "input_operand" "")
1504    (match_operand:QI 3 "u5bit_cint_operand" "")]
1505   "VECTOR_MEM_VSX_P (V1TImode)"
1506 {
1507   if (operands[3] != const0_rtx)
1508     gcc_unreachable ();
1509
1510   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
1511   DONE;
1512 })
1513
1514 ;; Set the element of a V2DI/VD2F mode
1515 (define_insn "vsx_set_<mode>"
1516   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
1517         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wd,wa")
1518                        (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")
1519                        (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
1520                       UNSPEC_VSX_SET))]
1521   "VECTOR_MEM_VSX_P (<MODE>mode)"
1522 {
1523   int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
1524   if (INTVAL (operands[3]) == idx_first)
1525     return \"xxpermdi %x0,%x2,%x1,1\";
1526   else if (INTVAL (operands[3]) == 1 - idx_first)
1527     return \"xxpermdi %x0,%x1,%x2,0\";
1528   else
1529     gcc_unreachable ();
1530 }
1531   [(set_attr "type" "vecperm")])
1532
1533 ;; Extract a DF/DI element from V2DF/V2DI
1534 (define_expand "vsx_extract_<mode>"
1535   [(set (match_operand:<VS_scalar> 0 "register_operand" "")
1536         (vec_select:<VS_scalar> (match_operand:VSX_D 1 "register_operand" "")
1537                        (parallel
1538                         [(match_operand:QI 2 "u5bit_cint_operand" "")])))]
1539   "VECTOR_MEM_VSX_P (<MODE>mode)"
1540   "")
1541
1542 ;; Optimize cases were we can do a simple or direct move.
1543 ;; Or see if we can avoid doing the move at all
1544 (define_insn "*vsx_extract_<mode>_internal1"
1545   [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,ws,?wa,r")
1546         (vec_select:<VS_scalar>
1547          (match_operand:VSX_D 1 "register_operand" "d,wd,wa,wm")
1548          (parallel
1549           [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD,wD")])))]
1550   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
1551 {
1552   int op0_regno = REGNO (operands[0]);
1553   int op1_regno = REGNO (operands[1]);
1554
1555   if (op0_regno == op1_regno)
1556     return "nop";
1557
1558   if (INT_REGNO_P (op0_regno))
1559     return "mfvsrd %0,%x1";
1560
1561   if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
1562     return "fmr %0,%1";
1563
1564   return "xxlor %x0,%x1,%x1";
1565 }
1566   [(set_attr "type" "fp,vecsimple,vecsimple,mftgpr")
1567    (set_attr "length" "4")])
1568
1569 (define_insn "*vsx_extract_<mode>_internal2"
1570   [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=d,ws,ws,?wa")
1571         (vec_select:<VS_scalar>
1572          (match_operand:VSX_D 1 "vsx_register_operand" "d,wd,wd,wa")
1573          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "wD,wD,i,i")])))]
1574   "VECTOR_MEM_VSX_P (<MODE>mode)
1575    && (!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE
1576        || INTVAL (operands[2]) != VECTOR_ELEMENT_SCALAR_64BIT)"
1577 {
1578   int fldDM;
1579   gcc_assert (UINTVAL (operands[2]) <= 1);
1580
1581   if (INTVAL (operands[2]) == VECTOR_ELEMENT_SCALAR_64BIT)
1582     {
1583       int op0_regno = REGNO (operands[0]);
1584       int op1_regno = REGNO (operands[1]);
1585
1586       if (op0_regno == op1_regno)
1587         return "nop";
1588
1589       if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
1590         return "fmr %0,%1";
1591
1592       return "xxlor %x0,%x1,%x1";
1593     }
1594
1595   fldDM = INTVAL (operands[2]) << 1;
1596   if (!BYTES_BIG_ENDIAN)
1597     fldDM = 3 - fldDM;
1598   operands[3] = GEN_INT (fldDM);
1599   return "xxpermdi %x0,%x1,%x1,%3";
1600 }
1601   [(set_attr "type" "fp,vecsimple,vecperm,vecperm")
1602    (set_attr "length" "4")])
1603
1604 ;; Optimize extracting a single scalar element from memory if the scalar is in
1605 ;; the correct location to use a single load.
1606 (define_insn "*vsx_extract_<mode>_load"
1607   [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr")
1608         (vec_select:<VS_scalar>
1609          (match_operand:VSX_D 1 "memory_operand" "m,Z,m")
1610          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
1611   "VECTOR_MEM_VSX_P (<MODE>mode)"
1612   "@
1613    lfd%U1%X1 %0,%1
1614    lxsd%U1x %x0,%y1
1615    ld%U1%X1 %0,%1"
1616   [(set_attr_alternative "type"
1617       [(if_then_else
1618          (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
1619          (const_string "fpload_ux")
1620          (if_then_else
1621            (match_test "update_address_mem (operands[1], VOIDmode)")
1622            (const_string "fpload_u")
1623            (const_string "fpload")))
1624        (const_string "fpload")
1625        (if_then_else
1626          (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
1627          (const_string "load_ux")
1628          (if_then_else
1629            (match_test "update_address_mem (operands[1], VOIDmode)")
1630            (const_string "load_u")
1631            (const_string "load")))])
1632    (set_attr "length" "4")])
1633
1634 ;; Optimize storing a single scalar element that is the right location to
1635 ;; memory
1636 (define_insn "*vsx_extract_<mode>_store"
1637   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,?Z")
1638         (vec_select:<VS_scalar>
1639          (match_operand:VSX_D 1 "register_operand" "d,wd,wa")
1640          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
1641   "VECTOR_MEM_VSX_P (<MODE>mode)"
1642   "@
1643    stfd%U0%X0 %1,%0
1644    stxsd%U0x %x1,%y0
1645    stxsd%U0x %x1,%y0"
1646   [(set_attr_alternative "type"
1647       [(if_then_else
1648          (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
1649          (const_string "fpstore_ux")
1650          (if_then_else
1651            (match_test "update_address_mem (operands[0], VOIDmode)")
1652            (const_string "fpstore_u")
1653            (const_string "fpstore")))
1654        (const_string "fpstore")
1655        (const_string "fpstore")])
1656    (set_attr "length" "4")])
1657
1658 ;; Extract a SF element from V4SF
1659 (define_insn_and_split "vsx_extract_v4sf"
1660   [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
1661         (vec_select:SF
1662          (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
1663          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
1664    (clobber (match_scratch:V4SF 3 "=X,0"))]
1665   "VECTOR_UNIT_VSX_P (V4SFmode)"
1666   "@
1667    xscvspdp %x0,%x1
1668    #"
1669   ""
1670   [(const_int 0)]
1671   "
1672 {
1673   rtx op0 = operands[0];
1674   rtx op1 = operands[1];
1675   rtx op2 = operands[2];
1676   rtx op3 = operands[3];
1677   rtx tmp;
1678   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
1679
1680   if (ele == 0)
1681     tmp = op1;
1682   else
1683     {
1684       if (GET_CODE (op3) == SCRATCH)
1685         op3 = gen_reg_rtx (V4SFmode);
1686       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, op2));
1687       tmp = op3;
1688     }
1689   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
1690   DONE;
1691 }"
1692   [(set_attr "length" "4,8")
1693    (set_attr "type" "fp")])
1694
1695 ;; Expand the builtin form of xxpermdi to canonical rtl.
1696 (define_expand "vsx_xxpermdi_<mode>"
1697   [(match_operand:VSX_L 0 "vsx_register_operand" "")
1698    (match_operand:VSX_L 1 "vsx_register_operand" "")
1699    (match_operand:VSX_L 2 "vsx_register_operand" "")
1700    (match_operand:QI 3 "u5bit_cint_operand" "")]
1701   "VECTOR_MEM_VSX_P (<MODE>mode)"
1702 {
1703   rtx target = operands[0];
1704   rtx op0 = operands[1];
1705   rtx op1 = operands[2];
1706   int mask = INTVAL (operands[3]);
1707   rtx perm0 = GEN_INT ((mask >> 1) & 1);
1708   rtx perm1 = GEN_INT ((mask & 1) + 2);
1709   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
1710
1711   if (<MODE>mode == V2DFmode)
1712     gen = gen_vsx_xxpermdi2_v2df_1;
1713   else
1714     {
1715       gen = gen_vsx_xxpermdi2_v2di_1;
1716       if (<MODE>mode != V2DImode)
1717         {
1718           target = gen_lowpart (V2DImode, target);
1719           op0 = gen_lowpart (V2DImode, op0);
1720           op1 = gen_lowpart (V2DImode, op1);
1721         }
1722     }
1723   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
1724      transformation we don't want; it is necessary for
1725      rs6000_expand_vec_perm_const_1 but not for this use.  So we
1726      prepare for that by reversing the transformation here.  */
1727   if (BYTES_BIG_ENDIAN)
1728     emit_insn (gen (target, op0, op1, perm0, perm1));
1729   else
1730     {
1731       rtx p0 = GEN_INT (3 - INTVAL (perm1));
1732       rtx p1 = GEN_INT (3 - INTVAL (perm0));
1733       emit_insn (gen (target, op1, op0, p0, p1));
1734     }
1735   DONE;
1736 })
1737
1738 (define_insn "vsx_xxpermdi2_<mode>_1"
1739   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
1740         (vec_select:VSX_D
1741           (vec_concat:<VS_double>
1742             (match_operand:VSX_D 1 "vsx_register_operand" "wd")
1743             (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
1744           (parallel [(match_operand 3 "const_0_to_1_operand" "")
1745                      (match_operand 4 "const_2_to_3_operand" "")])))]
1746   "VECTOR_MEM_VSX_P (<MODE>mode)"
1747 {
1748   int op3, op4, mask;
1749
1750   /* For little endian, swap operands and invert/swap selectors
1751      to get the correct xxpermdi.  The operand swap sets up the
1752      inputs as a little endian array.  The selectors are swapped
1753      because they are defined to use big endian ordering.  The
1754      selectors are inverted to get the correct doublewords for
1755      little endian ordering.  */
1756   if (BYTES_BIG_ENDIAN)
1757     {
1758       op3 = INTVAL (operands[3]);
1759       op4 = INTVAL (operands[4]);
1760     }
1761   else
1762     {
1763       op3 = 3 - INTVAL (operands[4]);
1764       op4 = 3 - INTVAL (operands[3]);
1765     }
1766
1767   mask = (op3 << 1) | (op4 - 2);
1768   operands[3] = GEN_INT (mask);
1769
1770   if (BYTES_BIG_ENDIAN)
1771     return "xxpermdi %x0,%x1,%x2,%3";
1772   else
1773     return "xxpermdi %x0,%x2,%x1,%3";
1774 }
1775   [(set_attr "type" "vecperm")])
1776
1777 (define_expand "vec_perm_const<mode>"
1778   [(match_operand:VSX_D 0 "vsx_register_operand" "")
1779    (match_operand:VSX_D 1 "vsx_register_operand" "")
1780    (match_operand:VSX_D 2 "vsx_register_operand" "")
1781    (match_operand:V2DI  3 "" "")]
1782   "VECTOR_MEM_VSX_P (<MODE>mode)"
1783 {
1784   if (rs6000_expand_vec_perm_const (operands))
1785     DONE;
1786   else
1787     FAIL;
1788 })
1789
1790 ;; Expanders for builtins
1791 (define_expand "vsx_mergel_<mode>"
1792   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
1793    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
1794    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
1795   "VECTOR_MEM_VSX_P (<MODE>mode)"
1796 {
1797   rtvec v;
1798   rtx x;
1799
1800   /* Special handling for LE with -maltivec=be.  */
1801   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
1802     {
1803       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
1804       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
1805     }
1806   else
1807     {
1808       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
1809       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
1810     }
1811
1812   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
1813   emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1814   DONE;
1815 })
1816
1817 (define_expand "vsx_mergeh_<mode>"
1818   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
1819    (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
1820    (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
1821   "VECTOR_MEM_VSX_P (<MODE>mode)"
1822 {
1823   rtvec v;
1824   rtx x;
1825
1826   /* Special handling for LE with -maltivec=be.  */
1827   if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
1828     {
1829       v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
1830       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
1831     }
1832   else
1833     {
1834       v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
1835       x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
1836     }
1837
1838   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
1839   emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1840   DONE;
1841 })
1842
1843 ;; V2DF/V2DI splat
1844 (define_insn "vsx_splat_<mode>"
1845   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa")
1846         (vec_duplicate:VSX_D
1847          (match_operand:<VS_scalar> 1 "splat_input_operand" "ws,f,Z,wa,wa,Z")))]
1848   "VECTOR_MEM_VSX_P (<MODE>mode)"
1849   "@
1850    xxpermdi %x0,%x1,%x1,0
1851    xxpermdi %x0,%x1,%x1,0
1852    lxvdsx %x0,%y1
1853    xxpermdi %x0,%x1,%x1,0
1854    xxpermdi %x0,%x1,%x1,0
1855    lxvdsx %x0,%y1"
1856   [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
1857
1858 ;; V4SF/V4SI splat
1859 (define_insn "vsx_xxspltw_<mode>"
1860   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1861         (vec_duplicate:VSX_W
1862          (vec_select:<VS_scalar>
1863           (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1864           (parallel
1865            [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
1866   "VECTOR_MEM_VSX_P (<MODE>mode)"
1867 {
1868   if (!BYTES_BIG_ENDIAN)
1869     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
1870
1871   return "xxspltw %x0,%x1,%2";
1872 }
1873   [(set_attr "type" "vecperm")])
1874
1875 (define_insn "vsx_xxspltw_<mode>_direct"
1876   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1877         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1878                        (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
1879                       UNSPEC_VSX_XXSPLTW))]
1880   "VECTOR_MEM_VSX_P (<MODE>mode)"
1881   "xxspltw %x0,%x1,%2"
1882   [(set_attr "type" "vecperm")])
1883
1884 ;; V4SF/V4SI interleave
1885 (define_insn "vsx_xxmrghw_<mode>"
1886   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1887         (vec_select:VSX_W
1888           (vec_concat:<VS_double>
1889             (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1890             (match_operand:VSX_W 2 "vsx_register_operand" "wf,wa"))
1891           (parallel [(const_int 0) (const_int 4)
1892                      (const_int 1) (const_int 5)])))]
1893   "VECTOR_MEM_VSX_P (<MODE>mode)"
1894   "xxmrghw %x0,%x1,%x2"
1895   [(set_attr "type" "vecperm")])
1896
1897 (define_insn "vsx_xxmrglw_<mode>"
1898   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1899         (vec_select:VSX_W
1900           (vec_concat:<VS_double>
1901             (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1902             (match_operand:VSX_W 2 "vsx_register_operand" "wf,?wa"))
1903           (parallel [(const_int 2) (const_int 6)
1904                      (const_int 3) (const_int 7)])))]
1905   "VECTOR_MEM_VSX_P (<MODE>mode)"
1906   "xxmrglw %x0,%x1,%x2"
1907   [(set_attr "type" "vecperm")])
1908
1909 ;; Shift left double by word immediate
1910 (define_insn "vsx_xxsldwi_<mode>"
1911   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
1912         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
1913                        (match_operand:VSX_L 2 "vsx_register_operand" "wa")
1914                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
1915                       UNSPEC_VSX_SLDWI))]
1916   "VECTOR_MEM_VSX_P (<MODE>mode)"
1917   "xxsldwi %x0,%x1,%x2,%3"
1918   [(set_attr "type" "vecperm")])
1919
1920 \f
1921 ;; Vector reduction insns and splitters
1922
1923 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df"
1924   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
1925         (VEC_reduc:V2DF
1926          (vec_concat:V2DF
1927           (vec_select:DF
1928            (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
1929            (parallel [(const_int 1)]))
1930           (vec_select:DF
1931            (match_dup 1)
1932            (parallel [(const_int 0)])))
1933          (match_dup 1)))
1934    (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
1935   "VECTOR_UNIT_VSX_P (V2DFmode)"
1936   "#"
1937   ""
1938   [(const_int 0)]
1939   "
1940 {
1941   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
1942              ? gen_reg_rtx (V2DFmode)
1943              : operands[2];
1944   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
1945   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
1946   DONE;
1947 }"
1948   [(set_attr "length" "8")
1949    (set_attr "type" "veccomplex")])
1950
1951 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf"
1952   [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
1953         (VEC_reduc:V4SF
1954          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
1955          (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
1956    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
1957    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
1958   "VECTOR_UNIT_VSX_P (V4SFmode)"
1959   "#"
1960   ""
1961   [(const_int 0)]
1962   "
1963 {
1964   rtx op0 = operands[0];
1965   rtx op1 = operands[1];
1966   rtx tmp2, tmp3, tmp4;
1967
1968   if (can_create_pseudo_p ())
1969     {
1970       tmp2 = gen_reg_rtx (V4SFmode);
1971       tmp3 = gen_reg_rtx (V4SFmode);
1972       tmp4 = gen_reg_rtx (V4SFmode);
1973     }
1974   else
1975     {
1976       tmp2 = operands[2];
1977       tmp3 = operands[3];
1978       tmp4 = tmp2;
1979     }
1980
1981   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
1982   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
1983   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
1984   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
1985   DONE;
1986 }"
1987   [(set_attr "length" "16")
1988    (set_attr "type" "veccomplex")])
1989
1990 ;; Combiner patterns with the vector reduction patterns that knows we can get
1991 ;; to the top element of the V2DF array without doing an extract.
1992
1993 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
1994   [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?wa,ws,?wa")
1995         (vec_select:DF
1996          (VEC_reduc:V2DF
1997           (vec_concat:V2DF
1998            (vec_select:DF
1999             (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2000             (parallel [(const_int 1)]))
2001            (vec_select:DF
2002             (match_dup 1)
2003             (parallel [(const_int 0)])))
2004           (match_dup 1))
2005          (parallel [(const_int 1)])))
2006    (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
2007   "VECTOR_UNIT_VSX_P (V2DFmode)"
2008   "#"
2009   ""
2010   [(const_int 0)]
2011   "
2012 {
2013   rtx hi = gen_highpart (DFmode, operands[1]);
2014   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
2015             ? gen_reg_rtx (DFmode)
2016             : operands[2];
2017
2018   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
2019   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
2020   DONE;
2021 }"
2022   [(set_attr "length" "8")
2023    (set_attr "type" "veccomplex")])
2024
2025 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
2026   [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
2027         (vec_select:SF
2028          (VEC_reduc:V4SF
2029           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2030           (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
2031          (parallel [(const_int 3)])))
2032    (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2033    (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
2034    (clobber (match_scratch:V4SF 4 "=0,0"))]
2035   "VECTOR_UNIT_VSX_P (V4SFmode)"
2036   "#"
2037   ""
2038   [(const_int 0)]
2039   "
2040 {
2041   rtx op0 = operands[0];
2042   rtx op1 = operands[1];
2043   rtx tmp2, tmp3, tmp4, tmp5;
2044
2045   if (can_create_pseudo_p ())
2046     {
2047       tmp2 = gen_reg_rtx (V4SFmode);
2048       tmp3 = gen_reg_rtx (V4SFmode);
2049       tmp4 = gen_reg_rtx (V4SFmode);
2050       tmp5 = gen_reg_rtx (V4SFmode);
2051     }
2052   else
2053     {
2054       tmp2 = operands[2];
2055       tmp3 = operands[3];
2056       tmp4 = tmp2;
2057       tmp5 = operands[4];
2058     }
2059
2060   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2061   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2062   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2063   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
2064   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
2065   DONE;
2066 }"
2067   [(set_attr "length" "20")
2068    (set_attr "type" "veccomplex")])
2069
2070 \f
2071 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
2072 (define_peephole
2073   [(set (match_operand:P 0 "base_reg_operand" "")
2074         (match_operand:P 1 "short_cint_operand" ""))
2075    (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2076         (mem:VSX_M2 (plus:P (match_dup 0)
2077                             (match_operand:P 3 "int_reg_operand" ""))))]
2078   "TARGET_VSX && TARGET_P8_FUSION"
2079   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2080   [(set_attr "length" "8")
2081    (set_attr "type" "vecload")])
2082
2083 (define_peephole
2084   [(set (match_operand:P 0 "base_reg_operand" "")
2085         (match_operand:P 1 "short_cint_operand" ""))
2086    (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2087         (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
2088                             (match_dup 0))))]
2089   "TARGET_VSX && TARGET_P8_FUSION"
2090   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2091   [(set_attr "length" "8")
2092    (set_attr "type" "vecload")])