gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2020 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
  23         (match_operand:VALL_F16MOV 1 "general_operand"))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand")
  43         (match_operand:VALL 1 "general_operand"))]
  44   "TARGET_SIMD && !STRICT_ALIGNMENT"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
 105   [(set (match_operand:VDMOV 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VDMOV 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
 133   [(set (match_operand:VQMOV 0 "nonimmediate_operand"
 134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQMOV 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 209   [(set (match_operand:VQ 0 "register_operand" "=w")
 210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 211    (set (match_operand:VQ2 2 "register_operand" "=w")
 212         (match_operand:VQ2 3 "memory_operand" "m"))]
 213   "TARGET_SIMD
 214     && rtx_equal_p (XEXP (operands[3], 0),
 215                     plus_constant (Pmode,
 216                                XEXP (operands[1], 0),
 217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 218   "ldp\\t%q0, %q2, %1"
 219   [(set_attr "type" "neon_ldp_q")]
 220 )
 221
 222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 224         (match_operand:VQ 1 "register_operand" "w"))
 225    (set (match_operand:VQ2 2 "memory_operand" "=m")
 226         (match_operand:VQ2 3 "register_operand" "w"))]
 227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 228                 plus_constant (Pmode,
 229                                XEXP (operands[0], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "stp\\t%q1, %q3, %0"
 232   [(set_attr "type" "neon_stp_q")]
 233 )
 234
 235
 236 (define_split
 237   [(set (match_operand:VQMOV 0 "register_operand" "")
 238       (match_operand:VQMOV 1 "register_operand" ""))]
 239   "TARGET_SIMD && reload_completed
 240    && GP_REGNUM_P (REGNO (operands[0]))
 241    && GP_REGNUM_P (REGNO (operands[1]))"
 242   [(const_int 0)]
 243 {
 244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 245   DONE;
 246 })
 247
 248 (define_split
 249   [(set (match_operand:VQMOV 0 "register_operand" "")
 250         (match_operand:VQMOV 1 "register_operand" ""))]
 251   "TARGET_SIMD && reload_completed
 252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 254   [(const_int 0)]
 255 {
 256   aarch64_split_simd_move (operands[0], operands[1]);
 257   DONE;
 258 })
 259
 260 (define_expand "@aarch64_split_simd_mov<mode>"
 261   [(set (match_operand:VQMOV 0)
 262         (match_operand:VQMOV 1))]
 263   "TARGET_SIMD"
 264   {
 265     rtx dst = operands[0];
 266     rtx src = operands[1];
 267
 268     if (GP_REGNUM_P (REGNO (src)))
 269       {
 270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 272
 273         emit_insn
 274           (gen_move_lo_quad_<mode> (dst, src_low_part));
 275         emit_insn
 276           (gen_move_hi_quad_<mode> (dst, src_high_part));
 277       }
 278
 279     else
 280       {
 281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 285         emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
 286         emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
 287       }
 288     DONE;
 289   }
 290 )
 291
 292 (define_expand "aarch64_get_half<mode>"
 293   [(set (match_operand:<VHALF> 0 "register_operand")
 294         (vec_select:<VHALF>
 295           (match_operand:VQMOV 1 "register_operand")
 296           (match_operand 2 "ascending_int_parallel")))]
 297   "TARGET_SIMD"
 298 )
 299
 300 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
 301   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
 302         (vec_select:<VHALF>
 303           (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
 304           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
 305   "TARGET_SIMD"
 306   "@
 307    #
 308    umov\t%0, %1.d[0]"
 309   "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
 310   [(set (match_dup 0) (match_dup 1))]
 311   {
 312     operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
 313   }
 314   [(set_attr "type" "mov_reg,neon_to_gp<q>")
 315    (set_attr "length" "4")]
 316 )
 317
 318 (define_insn "aarch64_simd_mov_from_<mode>high"
 319   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
 320         (vec_select:<VHALF>
 321           (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
 322           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
 323   "TARGET_SIMD"
 324   "@
 325    dup\\t%d0, %1.d[1]
 326    umov\t%0, %1.d[1]"
 327   [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
 328    (set_attr "length" "4")]
 329 )
 330
 331 (define_insn "orn<mode>3"
 332  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 333        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 334                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 335  "TARGET_SIMD"
 336  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 337   [(set_attr "type" "neon_logic<q>")]
 338 )
 339
 340 (define_insn "bic<mode>3"
 341  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 342        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 343                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 344  "TARGET_SIMD"
 345  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 346   [(set_attr "type" "neon_logic<q>")]
 347 )
 348
 349 (define_insn "add<mode>3"
 350   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 351         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 352                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 353   "TARGET_SIMD"
 354   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 355   [(set_attr "type" "neon_add<q>")]
 356 )
 357
 358 (define_insn "sub<mode>3"
 359   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 360         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 361                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 362   "TARGET_SIMD"
 363   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 364   [(set_attr "type" "neon_sub<q>")]
 365 )
 366
 367 (define_insn "mul<mode>3"
 368   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 369         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 370                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 371   "TARGET_SIMD"
 372   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 373   [(set_attr "type" "neon_mul_<Vetype><q>")]
 374 )
 375
 376 (define_insn "bswap<mode>2"
 377   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 378         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 379   "TARGET_SIMD"
 380   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 381   [(set_attr "type" "neon_rev<q>")]
 382 )
 383
 384 (define_insn "aarch64_rbit<mode>"
 385   [(set (match_operand:VB 0 "register_operand" "=w")
 386         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 387                    UNSPEC_RBIT))]
 388   "TARGET_SIMD"
 389   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 390   [(set_attr "type" "neon_rbit")]
 391 )
 392
 393 (define_expand "ctz<mode>2"
 394   [(set (match_operand:VS 0 "register_operand")
 395         (ctz:VS (match_operand:VS 1 "register_operand")))]
 396   "TARGET_SIMD"
 397   {
 398      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 399      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 400                                              <MODE>mode, 0);
 401      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 402      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 403      DONE;
 404   }
 405 )
 406
 407 (define_expand "xorsign<mode>3"
 408   [(match_operand:VHSDF 0 "register_operand")
 409    (match_operand:VHSDF 1 "register_operand")
 410    (match_operand:VHSDF 2 "register_operand")]
 411   "TARGET_SIMD"
 412 {
 413
 414   machine_mode imode = <V_INT_EQUIV>mode;
 415   rtx v_bitmask = gen_reg_rtx (imode);
 416   rtx op1x = gen_reg_rtx (imode);
 417   rtx op2x = gen_reg_rtx (imode);
 418
 419   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 420   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 421
 422   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 423
 424   emit_move_insn (v_bitmask,
 425                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 426                                                      HOST_WIDE_INT_M1U << bits));
 427
 428   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 429   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 430   emit_move_insn (operands[0],
 431                   lowpart_subreg (<MODE>mode, op1x, imode));
 432   DONE;
 433 }
 434 )
 435
 436 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 437 ;; fact that their usage need to guarantee that the source vectors are
 438 ;; contiguous.  It would be wrong to describe the operation without being able
 439 ;; to describe the permute that is also required, but even if that is done
 440 ;; the permute would have been created as a LOAD_LANES which means the values
 441 ;; in the registers are in the wrong order.
 442 (define_insn "aarch64_fcadd<rot><mode>"
 443   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 444         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 445                        (match_operand:VHSDF 2 "register_operand" "w")]
 446                        FCADD))]
 447   "TARGET_COMPLEX"
 448   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 449   [(set_attr "type" "neon_fcadd")]
 450 )
 451
 452 (define_insn "aarch64_fcmla<rot><mode>"
 453   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 454         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 455                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 456                                    (match_operand:VHSDF 3 "register_operand" "w")]
 457                                    FCMLA)))]
 458   "TARGET_COMPLEX"
 459   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 460   [(set_attr "type" "neon_fcmla")]
 461 )
 462
 463
 464 (define_insn "aarch64_fcmla_lane<rot><mode>"
 465   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 466         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 467                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 468                                    (match_operand:VHSDF 3 "register_operand" "w")
 469                                    (match_operand:SI 4 "const_int_operand" "n")]
 470                                    FCMLA)))]
 471   "TARGET_COMPLEX"
 472 {
 473   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 474   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 475 }
 476   [(set_attr "type" "neon_fcmla")]
 477 )
 478
 479 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
 480   [(set (match_operand:V4HF 0 "register_operand" "=w")
 481         (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
 482                    (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 483                                  (match_operand:V8HF 3 "register_operand" "w")
 484                                  (match_operand:SI 4 "const_int_operand" "n")]
 485                                  FCMLA)))]
 486   "TARGET_COMPLEX"
 487 {
 488   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 489   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 490 }
 491   [(set_attr "type" "neon_fcmla")]
 492 )
 493
 494 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 495   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 496         (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
 497                      (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 498                                      (match_operand:<VHALF> 3 "register_operand" "w")
 499                                      (match_operand:SI 4 "const_int_operand" "n")]
 500                                      FCMLA)))]
 501   "TARGET_COMPLEX"
 502 {
 503   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 504   operands[4]
 505     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 506   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 507 }
 508   [(set_attr "type" "neon_fcmla")]
 509 )
 510
 511 ;; These instructions map to the __builtins for the Dot Product operations.
 512 (define_insn "aarch64_<sur>dot<vsi2qi>"
 513   [(set (match_operand:VS 0 "register_operand" "=w")
 514         (plus:VS (match_operand:VS 1 "register_operand" "0")
 515                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 516                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 517                 DOTPROD)))]
 518   "TARGET_DOTPROD"
 519   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 520   [(set_attr "type" "neon_dot<q>")]
 521 )
 522
 523 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot
 524 ;; (vector) Dot Product operation.
 525 (define_insn "aarch64_usdot<vsi2qi>"
 526   [(set (match_operand:VS 0 "register_operand" "=w")
 527         (plus:VS
 528           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 529                       (match_operand:<VSI2QI> 3 "register_operand" "w")]
 530           UNSPEC_USDOT)
 531           (match_operand:VS 1 "register_operand" "0")))]
 532   "TARGET_I8MM"
 533   "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 534   [(set_attr "type" "neon_dot<q>")]
 535 )
 536
 537 ;; These expands map to the Dot Product optab the vectorizer checks for.
 538 ;; The auto-vectorizer expects a dot product builtin that also does an
 539 ;; accumulation into the provided register.
 540 ;; Given the following pattern
 541 ;;
 542 ;; for (i=0; i<len; i++) {
 543 ;;     c = a[i] * b[i];
 544 ;;     r += c;
 545 ;; }
 546 ;; return result;
 547 ;;
 548 ;; This can be auto-vectorized to
 549 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 550 ;;
 551 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 552 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 553 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 554 ;; ...
 555 ;;
 556 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 557 (define_expand "<sur>dot_prod<vsi2qi>"
 558   [(set (match_operand:VS 0 "register_operand")
 559         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 560                             (match_operand:<VSI2QI> 2 "register_operand")]
 561                  DOTPROD)
 562                 (match_operand:VS 3 "register_operand")))]
 563   "TARGET_DOTPROD"
 564 {
 565   emit_insn (
 566     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 567                                     operands[2]));
 568   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 569   DONE;
 570 })
 571
 572 ;; These instructions map to the __builtins for the Dot Product
 573 ;; indexed operations.
 574 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 575   [(set (match_operand:VS 0 "register_operand" "=w")
 576         (plus:VS (match_operand:VS 1 "register_operand" "0")
 577                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 578                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 579                             (match_operand:SI 4 "immediate_operand" "i")]
 580                 DOTPROD)))]
 581   "TARGET_DOTPROD"
 582   {
 583     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 584     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 585   }
 586   [(set_attr "type" "neon_dot<q>")]
 587 )
 588
 589 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 590   [(set (match_operand:VS 0 "register_operand" "=w")
 591         (plus:VS (match_operand:VS 1 "register_operand" "0")
 592                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 593                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 594                             (match_operand:SI 4 "immediate_operand" "i")]
 595                 DOTPROD)))]
 596   "TARGET_DOTPROD"
 597   {
 598     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 599     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 600   }
 601   [(set_attr "type" "neon_dot<q>")]
 602 )
 603
 604 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
 605 ;; (by element) Dot Product operations.
 606 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
 607   [(set (match_operand:VS 0 "register_operand" "=w")
 608         (plus:VS
 609           (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
 610                       (match_operand:VB 3 "register_operand" "w")
 611                       (match_operand:SI 4 "immediate_operand" "i")]
 612           DOTPROD_I8MM)
 613           (match_operand:VS 1 "register_operand" "0")))]
 614   "TARGET_I8MM"
 615   {
 616     int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
 617     int lane = INTVAL (operands[4]);
 618     operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
 619     return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
 620   }
 621   [(set_attr "type" "neon_dot<VS:q>")]
 622 )
 623
 624 (define_expand "copysign<mode>3"
 625   [(match_operand:VHSDF 0 "register_operand")
 626    (match_operand:VHSDF 1 "register_operand")
 627    (match_operand:VHSDF 2 "register_operand")]
 628   "TARGET_FLOAT && TARGET_SIMD"
 629 {
 630   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 631   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 632
 633   emit_move_insn (v_bitmask,
 634                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 635                                                      HOST_WIDE_INT_M1U << bits));
 636   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 637                                          operands[2], operands[1]));
 638   DONE;
 639 }
 640 )
 641
 642 (define_insn "*aarch64_mul3_elt<mode>"
 643  [(set (match_operand:VMUL 0 "register_operand" "=w")
 644     (mult:VMUL
 645       (vec_duplicate:VMUL
 646           (vec_select:<VEL>
 647             (match_operand:VMUL 1 "register_operand" "<h_con>")
 648             (parallel [(match_operand:SI 2 "immediate_operand")])))
 649       (match_operand:VMUL 3 "register_operand" "w")))]
 650   "TARGET_SIMD"
 651   {
 652     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 653     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 654   }
 655   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 656 )
 657
 658 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 659   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 660      (mult:VMUL_CHANGE_NLANES
 661        (vec_duplicate:VMUL_CHANGE_NLANES
 662           (vec_select:<VEL>
 663             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 664             (parallel [(match_operand:SI 2 "immediate_operand")])))
 665       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 666   "TARGET_SIMD"
 667   {
 668     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 669     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 670   }
 671   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 672 )
 673
 674 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 675  [(set (match_operand:VMUL 0 "register_operand" "=w")
 676     (mult:VMUL
 677       (vec_duplicate:VMUL
 678             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 679       (match_operand:VMUL 2 "register_operand" "w")))]
 680   "TARGET_SIMD"
 681   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 682   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 683 )
 684
 685 (define_insn "@aarch64_rsqrte<mode>"
 686   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 687         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 688                      UNSPEC_RSQRTE))]
 689   "TARGET_SIMD"
 690   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 691   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 692
 693 (define_insn "@aarch64_rsqrts<mode>"
 694   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 695         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 696                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 697          UNSPEC_RSQRTS))]
 698   "TARGET_SIMD"
 699   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 700   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 701
 702 (define_expand "rsqrt<mode>2"
 703   [(set (match_operand:VALLF 0 "register_operand")
 704         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 705                      UNSPEC_RSQRT))]
 706   "TARGET_SIMD"
 707 {
 708   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 709   DONE;
 710 })
 711
 712 (define_insn "*aarch64_mul3_elt_to_64v2df"
 713   [(set (match_operand:DF 0 "register_operand" "=w")
 714      (mult:DF
 715        (vec_select:DF
 716          (match_operand:V2DF 1 "register_operand" "w")
 717          (parallel [(match_operand:SI 2 "immediate_operand")]))
 718        (match_operand:DF 3 "register_operand" "w")))]
 719   "TARGET_SIMD"
 720   {
 721     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 722     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 723   }
 724   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 725 )
 726
 727 (define_insn "neg<mode>2"
 728   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 729         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 730   "TARGET_SIMD"
 731   "neg\t%0.<Vtype>, %1.<Vtype>"
 732   [(set_attr "type" "neon_neg<q>")]
 733 )
 734
 735 (define_insn "abs<mode>2"
 736   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 737         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 738   "TARGET_SIMD"
 739   "abs\t%0.<Vtype>, %1.<Vtype>"
 740   [(set_attr "type" "neon_abs<q>")]
 741 )
 742
 743 ;; The intrinsic version of integer ABS must not be allowed to
 744 ;; combine with any operation with an integerated ABS step, such
 745 ;; as SABD.
 746 (define_insn "aarch64_abs<mode>"
 747   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 748           (unspec:VSDQ_I_DI
 749             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 750            UNSPEC_ABS))]
 751   "TARGET_SIMD"
 752   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 753   [(set_attr "type" "neon_abs<q>")]
 754 )
 755
 756 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 757 ;; This isn't accurate as ABS treats always its input as a signed value.
 758 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 759 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 760 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 761 (define_insn "aarch64_<su>abd<mode>_3"
 762   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 763         (minus:VDQ_BHSI
 764           (USMAX:VDQ_BHSI
 765             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 766             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 767           (<max_opp>:VDQ_BHSI
 768             (match_dup 1)
 769             (match_dup 2))))]
 770   "TARGET_SIMD"
 771   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 772   [(set_attr "type" "neon_abd<q>")]
 773 )
 774
 775 (define_insn "aarch64_<sur>abdl2<mode>_3"
 776   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 777         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 778                           (match_operand:VDQV_S 2 "register_operand" "w")]
 779         ABDL2))]
 780   "TARGET_SIMD"
 781   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 782   [(set_attr "type" "neon_abd<q>")]
 783 )
 784
 785 (define_insn "aarch64_<sur>abal<mode>_4"
 786   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 787         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 788                           (match_operand:VDQV_S 2 "register_operand" "w")
 789                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 790         ABAL))]
 791   "TARGET_SIMD"
 792   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 793   [(set_attr "type" "neon_arith_acc<q>")]
 794 )
 795
 796 (define_insn "aarch64_<sur>adalp<mode>_3"
 797   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 798         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 799                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 800         ADALP))]
 801   "TARGET_SIMD"
 802   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 803   [(set_attr "type" "neon_reduc_add<q>")]
 804 )
 805
 806 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 807 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 808 ;; reduction of the difference into a V4SI vector and accumulate that into
 809 ;; operand 3 before copying that into the result operand 0.
 810 ;; Perform that with a sequence of:
 811 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 812 ;; UABAL        tmp.8h, op1.16b, op2.16b
 813 ;; UADALP       op3.4s, tmp.8h
 814 ;; MOV          op0, op3 // should be eliminated in later passes.
 815 ;;
 816 ;; For TARGET_DOTPROD we do:
 817 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
 818 ;; UABD tmp2.16b, op1.16b, op2.16b
 819 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
 820 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
 821 ;;
 822 ;; The signed version just uses the signed variants of the above instructions
 823 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
 824 ;; unsigned.
 825
 826 (define_expand "<sur>sadv16qi"
 827   [(use (match_operand:V4SI 0 "register_operand"))
 828    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 829                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 830    (use (match_operand:V4SI 3 "register_operand"))]
 831   "TARGET_SIMD"
 832   {
 833     if (TARGET_DOTPROD)
 834       {
 835         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
 836         rtx abd = gen_reg_rtx (V16QImode);
 837         emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
 838         emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
 839                                           abd, ones));
 840         DONE;
 841       }
 842     rtx reduc = gen_reg_rtx (V8HImode);
 843     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 844                                                operands[2]));
 845     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 846                                               operands[2], reduc));
 847     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 848                                               operands[3]));
 849     emit_move_insn (operands[0], operands[3]);
 850     DONE;
 851   }
 852 )
 853
 854 (define_insn "aba<mode>_3"
 855   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 856         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 857                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 858                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 859                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 860   "TARGET_SIMD"
 861   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 862   [(set_attr "type" "neon_arith_acc<q>")]
 863 )
 864
 865 (define_insn "fabd<mode>3"
 866   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 867         (abs:VHSDF_HSDF
 868           (minus:VHSDF_HSDF
 869             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 870             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 871   "TARGET_SIMD"
 872   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 873   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 874 )
 875
 876 ;; For AND (vector, register) and BIC (vector, immediate)
 877 (define_insn "and<mode>3"
 878   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 879         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 880                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 881   "TARGET_SIMD"
 882   {
 883     switch (which_alternative)
 884       {
 885       case 0:
 886         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 887       case 1:
 888         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 889                                                   AARCH64_CHECK_BIC);
 890       default:
 891         gcc_unreachable ();
 892       }
 893   }
 894   [(set_attr "type" "neon_logic<q>")]
 895 )
 896
 897 ;; For ORR (vector, register) and ORR (vector, immediate)
 898 (define_insn "ior<mode>3"
 899   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 900         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 901                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 902   "TARGET_SIMD"
 903   {
 904     switch (which_alternative)
 905       {
 906       case 0:
 907         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 908       case 1:
 909         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 910                                                   AARCH64_CHECK_ORR);
 911       default:
 912         gcc_unreachable ();
 913       }
 914   }
 915   [(set_attr "type" "neon_logic<q>")]
 916 )
 917
 918 (define_insn "xor<mode>3"
 919   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 920         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 921                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 922   "TARGET_SIMD"
 923   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 924   [(set_attr "type" "neon_logic<q>")]
 925 )
 926
 927 (define_insn "one_cmpl<mode>2"
 928   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 929         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 930   "TARGET_SIMD"
 931   "not\t%0.<Vbtype>, %1.<Vbtype>"
 932   [(set_attr "type" "neon_logic<q>")]
 933 )
 934
 935 (define_insn "aarch64_simd_vec_set<mode>"
 936   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 937         (vec_merge:VALL_F16
 938             (vec_duplicate:VALL_F16
 939                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 940             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 941             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 942   "TARGET_SIMD"
 943   {
 944    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 945    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 946    switch (which_alternative)
 947      {
 948      case 0:
 949         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 950      case 1:
 951         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 952      case 2:
 953         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 954      default:
 955         gcc_unreachable ();
 956      }
 957   }
 958   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 959 )
 960
 961 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 962   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 963         (vec_merge:VALL_F16
 964             (vec_duplicate:VALL_F16
 965               (vec_select:<VEL>
 966                 (match_operand:VALL_F16 3 "register_operand" "w")
 967                 (parallel
 968                   [(match_operand:SI 4 "immediate_operand" "i")])))
 969             (match_operand:VALL_F16 1 "register_operand" "0")
 970             (match_operand:SI 2 "immediate_operand" "i")))]
 971   "TARGET_SIMD"
 972   {
 973     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 974     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 975     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 976
 977     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 978   }
 979   [(set_attr "type" "neon_ins<q>")]
 980 )
 981
 982 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 983   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 984         (vec_merge:VALL_F16_NO_V2Q
 985             (vec_duplicate:VALL_F16_NO_V2Q
 986               (vec_select:<VEL>
 987                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 988                 (parallel
 989                   [(match_operand:SI 4 "immediate_operand" "i")])))
 990             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 991             (match_operand:SI 2 "immediate_operand" "i")))]
 992   "TARGET_SIMD"
 993   {
 994     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 995     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 996     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 997                                            INTVAL (operands[4]));
 998
 999     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1000   }
1001   [(set_attr "type" "neon_ins<q>")]
1002 )
1003
1004 (define_expand "signbit<mode>2"
1005   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1006    (use (match_operand:VDQSF 1 "register_operand"))]
1007   "TARGET_SIMD"
1008 {
1009   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1010   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1011                                                         shift_amount);
1012   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1013
1014   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1015                                                  shift_vector));
1016   DONE;
1017 })
1018
1019 (define_insn "aarch64_simd_lshr<mode>"
1020  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1021        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1022                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1023  "TARGET_SIMD"
1024  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1025   [(set_attr "type" "neon_shift_imm<q>")]
1026 )
1027
1028 (define_insn "aarch64_simd_ashr<mode>"
1029  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1030        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1031                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1032  "TARGET_SIMD"
1033  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1034   [(set_attr "type" "neon_shift_imm<q>")]
1035 )
1036
1037 (define_insn "*aarch64_simd_sra<mode>"
1038  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1039         (plus:VDQ_I
1040            (SHIFTRT:VDQ_I
1041                 (match_operand:VDQ_I 1 "register_operand" "w")
1042                 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1043            (match_operand:VDQ_I 3 "register_operand" "0")))]
1044   "TARGET_SIMD"
1045   "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1046   [(set_attr "type" "neon_shift_acc<q>")]
1047 )
1048
1049 (define_insn "aarch64_simd_imm_shl<mode>"
1050  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1051        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1052                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1053  "TARGET_SIMD"
1054   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1055   [(set_attr "type" "neon_shift_imm<q>")]
1056 )
1057
1058 (define_insn "aarch64_simd_reg_sshl<mode>"
1059  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1060        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1061                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1062  "TARGET_SIMD"
1063  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1064   [(set_attr "type" "neon_shift_reg<q>")]
1065 )
1066
1067 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1068  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1069        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1070                     (match_operand:VDQ_I 2 "register_operand" "w")]
1071                    UNSPEC_ASHIFT_UNSIGNED))]
1072  "TARGET_SIMD"
1073  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1074   [(set_attr "type" "neon_shift_reg<q>")]
1075 )
1076
1077 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1078  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1079        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1080                     (match_operand:VDQ_I 2 "register_operand" "w")]
1081                    UNSPEC_ASHIFT_SIGNED))]
1082  "TARGET_SIMD"
1083  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1084   [(set_attr "type" "neon_shift_reg<q>")]
1085 )
1086
1087 (define_expand "ashl<mode>3"
1088   [(match_operand:VDQ_I 0 "register_operand")
1089    (match_operand:VDQ_I 1 "register_operand")
1090    (match_operand:SI  2 "general_operand")]
1091  "TARGET_SIMD"
1092 {
1093   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1094   int shift_amount;
1095
1096   if (CONST_INT_P (operands[2]))
1097     {
1098       shift_amount = INTVAL (operands[2]);
1099       if (shift_amount >= 0 && shift_amount < bit_width)
1100         {
1101           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1102                                                        shift_amount);
1103           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1104                                                      operands[1],
1105                                                      tmp));
1106           DONE;
1107         }
1108       else
1109         {
1110           operands[2] = force_reg (SImode, operands[2]);
1111         }
1112     }
1113   else if (MEM_P (operands[2]))
1114     {
1115       operands[2] = force_reg (SImode, operands[2]);
1116     }
1117
1118   if (REG_P (operands[2]))
1119     {
1120       rtx tmp = gen_reg_rtx (<MODE>mode);
1121       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1122                                              convert_to_mode (<VEL>mode,
1123                                                               operands[2],
1124                                                               0)));
1125       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1126                                                   tmp));
1127       DONE;
1128     }
1129   else
1130     FAIL;
1131 }
1132 )
1133
1134 (define_expand "lshr<mode>3"
1135   [(match_operand:VDQ_I 0 "register_operand")
1136    (match_operand:VDQ_I 1 "register_operand")
1137    (match_operand:SI  2 "general_operand")]
1138  "TARGET_SIMD"
1139 {
1140   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1141   int shift_amount;
1142
1143   if (CONST_INT_P (operands[2]))
1144     {
1145       shift_amount = INTVAL (operands[2]);
1146       if (shift_amount > 0 && shift_amount <= bit_width)
1147         {
1148           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1149                                                        shift_amount);
1150           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1151                                                   operands[1],
1152                                                   tmp));
1153           DONE;
1154         }
1155       else
1156         operands[2] = force_reg (SImode, operands[2]);
1157     }
1158   else if (MEM_P (operands[2]))
1159     {
1160       operands[2] = force_reg (SImode, operands[2]);
1161     }
1162
1163   if (REG_P (operands[2]))
1164     {
1165       rtx tmp = gen_reg_rtx (SImode);
1166       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1167       emit_insn (gen_negsi2 (tmp, operands[2]));
1168       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1169                                              convert_to_mode (<VEL>mode,
1170                                                               tmp, 0)));
1171       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1172                                                           operands[1],
1173                                                           tmp1));
1174       DONE;
1175     }
1176   else
1177     FAIL;
1178 }
1179 )
1180
1181 (define_expand "ashr<mode>3"
1182   [(match_operand:VDQ_I 0 "register_operand")
1183    (match_operand:VDQ_I 1 "register_operand")
1184    (match_operand:SI  2 "general_operand")]
1185  "TARGET_SIMD"
1186 {
1187   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1188   int shift_amount;
1189
1190   if (CONST_INT_P (operands[2]))
1191     {
1192       shift_amount = INTVAL (operands[2]);
1193       if (shift_amount > 0 && shift_amount <= bit_width)
1194         {
1195           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1196                                                        shift_amount);
1197           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1198                                                   operands[1],
1199                                                   tmp));
1200           DONE;
1201         }
1202       else
1203         operands[2] = force_reg (SImode, operands[2]);
1204     }
1205   else if (MEM_P (operands[2]))
1206     {
1207       operands[2] = force_reg (SImode, operands[2]);
1208     }
1209
1210   if (REG_P (operands[2]))
1211     {
1212       rtx tmp = gen_reg_rtx (SImode);
1213       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1214       emit_insn (gen_negsi2 (tmp, operands[2]));
1215       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1216                                              convert_to_mode (<VEL>mode,
1217                                                               tmp, 0)));
1218       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1219                                                         operands[1],
1220                                                         tmp1));
1221       DONE;
1222     }
1223   else
1224     FAIL;
1225 }
1226 )
1227
1228 (define_expand "vashl<mode>3"
1229  [(match_operand:VDQ_I 0 "register_operand")
1230   (match_operand:VDQ_I 1 "register_operand")
1231   (match_operand:VDQ_I 2 "register_operand")]
1232  "TARGET_SIMD"
1233 {
1234   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1235                                               operands[2]));
1236   DONE;
1237 })
1238
1239 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1240 ;; Negating individual lanes most certainly offsets the
1241 ;; gain from vectorization.
1242 (define_expand "vashr<mode>3"
1243  [(match_operand:VDQ_BHSI 0 "register_operand")
1244   (match_operand:VDQ_BHSI 1 "register_operand")
1245   (match_operand:VDQ_BHSI 2 "register_operand")]
1246  "TARGET_SIMD"
1247 {
1248   rtx neg = gen_reg_rtx (<MODE>mode);
1249   emit (gen_neg<mode>2 (neg, operands[2]));
1250   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1251                                                     neg));
1252   DONE;
1253 })
1254
1255 ;; DI vector shift
1256 (define_expand "aarch64_ashr_simddi"
1257   [(match_operand:DI 0 "register_operand")
1258    (match_operand:DI 1 "register_operand")
1259    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1260   "TARGET_SIMD"
1261   {
1262     /* An arithmetic shift right by 64 fills the result with copies of the sign
1263        bit, just like asr by 63 - however the standard pattern does not handle
1264        a shift by 64.  */
1265     if (INTVAL (operands[2]) == 64)
1266       operands[2] = GEN_INT (63);
1267     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1268     DONE;
1269   }
1270 )
1271
1272 (define_expand "vlshr<mode>3"
1273  [(match_operand:VDQ_BHSI 0 "register_operand")
1274   (match_operand:VDQ_BHSI 1 "register_operand")
1275   (match_operand:VDQ_BHSI 2 "register_operand")]
1276  "TARGET_SIMD"
1277 {
1278   rtx neg = gen_reg_rtx (<MODE>mode);
1279   emit (gen_neg<mode>2 (neg, operands[2]));
1280   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1281                                                       neg));
1282   DONE;
1283 })
1284
1285 (define_expand "aarch64_lshr_simddi"
1286   [(match_operand:DI 0 "register_operand")
1287    (match_operand:DI 1 "register_operand")
1288    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1289   "TARGET_SIMD"
1290   {
1291     if (INTVAL (operands[2]) == 64)
1292       emit_move_insn (operands[0], const0_rtx);
1293     else
1294       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1295     DONE;
1296   }
1297 )
1298
1299 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1300 (define_insn "vec_shr_<mode>"
1301   [(set (match_operand:VD 0 "register_operand" "=w")
1302         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1303                     (match_operand:SI 2 "immediate_operand" "i")]
1304                    UNSPEC_VEC_SHR))]
1305   "TARGET_SIMD"
1306   {
1307     if (BYTES_BIG_ENDIAN)
1308       return "shl %d0, %d1, %2";
1309     else
1310       return "ushr %d0, %d1, %2";
1311   }
1312   [(set_attr "type" "neon_shift_imm")]
1313 )
1314
1315 (define_expand "vec_set<mode>"
1316   [(match_operand:VALL_F16 0 "register_operand")
1317    (match_operand:<VEL> 1 "register_operand")
1318    (match_operand:SI 2 "immediate_operand")]
1319   "TARGET_SIMD"
1320   {
1321     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1322     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1323                                           GEN_INT (elem), operands[0]));
1324     DONE;
1325   }
1326 )
1327
1328
1329 (define_insn "aarch64_mla<mode>"
1330  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1331        (plus:VDQ_BHSI (mult:VDQ_BHSI
1332                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1333                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1334                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1335  "TARGET_SIMD"
1336  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1337   [(set_attr "type" "neon_mla_<Vetype><q>")]
1338 )
1339
1340 (define_insn "*aarch64_mla_elt<mode>"
1341  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1342        (plus:VDQHS
1343          (mult:VDQHS
1344            (vec_duplicate:VDQHS
1345               (vec_select:<VEL>
1346                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1347                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1348            (match_operand:VDQHS 3 "register_operand" "w"))
1349          (match_operand:VDQHS 4 "register_operand" "0")))]
1350  "TARGET_SIMD"
1351   {
1352     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1353     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1354   }
1355   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1356 )
1357
1358 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1359  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1360        (plus:VDQHS
1361          (mult:VDQHS
1362            (vec_duplicate:VDQHS
1363               (vec_select:<VEL>
1364                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1365                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1366            (match_operand:VDQHS 3 "register_operand" "w"))
1367          (match_operand:VDQHS 4 "register_operand" "0")))]
1368  "TARGET_SIMD"
1369   {
1370     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1371     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1372   }
1373   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1374 )
1375
1376 (define_insn "*aarch64_mla_elt_merge<mode>"
1377   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1378         (plus:VDQHS
1379           (mult:VDQHS (vec_duplicate:VDQHS
1380                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1381                 (match_operand:VDQHS 2 "register_operand" "w"))
1382           (match_operand:VDQHS 3 "register_operand" "0")))]
1383  "TARGET_SIMD"
1384  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1385   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1386 )
1387
1388 (define_insn "aarch64_mls<mode>"
1389  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1390        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1391                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1392                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1393  "TARGET_SIMD"
1394  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1395   [(set_attr "type" "neon_mla_<Vetype><q>")]
1396 )
1397
1398 (define_insn "*aarch64_mls_elt<mode>"
1399  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1400        (minus:VDQHS
1401          (match_operand:VDQHS 4 "register_operand" "0")
1402          (mult:VDQHS
1403            (vec_duplicate:VDQHS
1404               (vec_select:<VEL>
1405                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1406                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1407            (match_operand:VDQHS 3 "register_operand" "w"))))]
1408  "TARGET_SIMD"
1409   {
1410     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1411     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1412   }
1413   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1414 )
1415
1416 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1417  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1418        (minus:VDQHS
1419          (match_operand:VDQHS 4 "register_operand" "0")
1420          (mult:VDQHS
1421            (vec_duplicate:VDQHS
1422               (vec_select:<VEL>
1423                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1424                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1425            (match_operand:VDQHS 3 "register_operand" "w"))))]
1426  "TARGET_SIMD"
1427   {
1428     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1429     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1430   }
1431   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1432 )
1433
1434 (define_insn "*aarch64_mls_elt_merge<mode>"
1435   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1436         (minus:VDQHS
1437           (match_operand:VDQHS 1 "register_operand" "0")
1438           (mult:VDQHS (vec_duplicate:VDQHS
1439                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1440                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1441   "TARGET_SIMD"
1442   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1443   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1444 )
1445
1446 ;; Max/Min operations.
1447 (define_insn "<su><maxmin><mode>3"
1448  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1449        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1450                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1451  "TARGET_SIMD"
1452  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1453   [(set_attr "type" "neon_minmax<q>")]
1454 )
1455
1456 (define_expand "<su><maxmin>v2di3"
1457  [(set (match_operand:V2DI 0 "register_operand")
1458        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1459                     (match_operand:V2DI 2 "register_operand")))]
1460  "TARGET_SIMD"
1461 {
1462   enum rtx_code cmp_operator;
1463   rtx cmp_fmt;
1464
1465   switch (<CODE>)
1466     {
1467     case UMIN:
1468       cmp_operator = LTU;
1469       break;
1470     case SMIN:
1471       cmp_operator = LT;
1472       break;
1473     case UMAX:
1474       cmp_operator = GTU;
1475       break;
1476     case SMAX:
1477       cmp_operator = GT;
1478       break;
1479     default:
1480       gcc_unreachable ();
1481     }
1482
1483   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1484   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1485               operands[2], cmp_fmt, operands[1], operands[2]));
1486   DONE;
1487 })
1488
1489 ;; Pairwise Integer Max/Min operations.
1490 (define_insn "aarch64_<maxmin_uns>p<mode>"
1491  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1492        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1493                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1494                         MAXMINV))]
1495  "TARGET_SIMD"
1496  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1497   [(set_attr "type" "neon_minmax<q>")]
1498 )
1499
1500 ;; Pairwise FP Max/Min operations.
1501 (define_insn "aarch64_<maxmin_uns>p<mode>"
1502  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1503        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1504                       (match_operand:VHSDF 2 "register_operand" "w")]
1505                       FMAXMINV))]
1506  "TARGET_SIMD"
1507  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1508   [(set_attr "type" "neon_minmax<q>")]
1509 )
1510
1511 ;; vec_concat gives a new vector with the low elements from operand 1, and
1512 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1513 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1514 ;; What that means, is that the RTL descriptions of the below patterns
1515 ;; need to change depending on endianness.
1516
1517 ;; Move to the low architectural bits of the register.
1518 ;; On little-endian this is { operand, zeroes }
1519 ;; On big-endian this is { zeroes, operand }
1520
1521 (define_insn "move_lo_quad_internal_<mode>"
1522   [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1523         (vec_concat:VQMOV_NO2E
1524           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1525           (vec_duplicate:<VHALF> (const_int 0))))]
1526   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1527   "@
1528    dup\\t%d0, %1.d[0]
1529    fmov\\t%d0, %1
1530    dup\\t%d0, %1"
1531   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1532    (set_attr "length" "4")
1533    (set_attr "arch" "simd,fp,simd")]
1534 )
1535
1536 (define_insn "move_lo_quad_internal_<mode>"
1537   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1538         (vec_concat:VQ_2E
1539           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1540           (const_int 0)))]
1541   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1542   "@
1543    dup\\t%d0, %1.d[0]
1544    fmov\\t%d0, %1
1545    dup\\t%d0, %1"
1546   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1547    (set_attr "length" "4")
1548    (set_attr "arch" "simd,fp,simd")]
1549 )
1550
1551 (define_insn "move_lo_quad_internal_be_<mode>"
1552   [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1553         (vec_concat:VQMOV_NO2E
1554           (vec_duplicate:<VHALF> (const_int 0))
1555           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1556   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1557   "@
1558    dup\\t%d0, %1.d[0]
1559    fmov\\t%d0, %1
1560    dup\\t%d0, %1"
1561   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1562    (set_attr "length" "4")
1563    (set_attr "arch" "simd,fp,simd")]
1564 )
1565
1566 (define_insn "move_lo_quad_internal_be_<mode>"
1567   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1568         (vec_concat:VQ_2E
1569           (const_int 0)
1570           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1571   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1572   "@
1573    dup\\t%d0, %1.d[0]
1574    fmov\\t%d0, %1
1575    dup\\t%d0, %1"
1576   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1577    (set_attr "length" "4")
1578    (set_attr "arch" "simd,fp,simd")]
1579 )
1580
1581 (define_expand "move_lo_quad_<mode>"
1582   [(match_operand:VQMOV 0 "register_operand")
1583    (match_operand:VQMOV 1 "register_operand")]
1584   "TARGET_SIMD"
1585 {
1586   if (BYTES_BIG_ENDIAN)
1587     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1588   else
1589     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1590   DONE;
1591 }
1592 )
1593
1594 ;; Move operand1 to the high architectural bits of the register, keeping
1595 ;; the low architectural bits of operand2.
1596 ;; For little-endian this is { operand2, operand1 }
1597 ;; For big-endian this is { operand1, operand2 }
1598
1599 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1600   [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1601         (vec_concat:VQMOV
1602           (vec_select:<VHALF>
1603                 (match_dup 0)
1604                 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1605           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1606   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1607   "@
1608    ins\\t%0.d[1], %1.d[0]
1609    ins\\t%0.d[1], %1"
1610   [(set_attr "type" "neon_ins")]
1611 )
1612
1613 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1614   [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1615         (vec_concat:VQMOV
1616           (match_operand:<VHALF> 1 "register_operand" "w,r")
1617           (vec_select:<VHALF>
1618                 (match_dup 0)
1619                 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1620   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1621   "@
1622    ins\\t%0.d[1], %1.d[0]
1623    ins\\t%0.d[1], %1"
1624   [(set_attr "type" "neon_ins")]
1625 )
1626
1627 (define_expand "move_hi_quad_<mode>"
1628  [(match_operand:VQMOV 0 "register_operand")
1629   (match_operand:<VHALF> 1 "register_operand")]
1630  "TARGET_SIMD"
1631 {
1632   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1633   if (BYTES_BIG_ENDIAN)
1634     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1635                     operands[1], p));
1636   else
1637     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1638                     operands[1], p));
1639   DONE;
1640 })
1641
1642 ;; Narrowing operations.
1643
1644 ;; For doubles.
1645 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1646  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1647        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1648  "TARGET_SIMD"
1649  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1650   [(set_attr "type" "neon_shift_imm_narrow_q")]
1651 )
1652
1653 (define_expand "vec_pack_trunc_<mode>"
1654  [(match_operand:<VNARROWD> 0 "register_operand")
1655   (match_operand:VDN 1 "register_operand")
1656   (match_operand:VDN 2 "register_operand")]
1657  "TARGET_SIMD"
1658 {
1659   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1660   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1661   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1662
1663   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1664   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1665   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1666   DONE;
1667 })
1668
1669 ;; For quads.
1670
1671 (define_insn "vec_pack_trunc_<mode>"
1672  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1673        (vec_concat:<VNARROWQ2>
1674          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1675          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1676  "TARGET_SIMD"
1677  {
1678    if (BYTES_BIG_ENDIAN)
1679      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1680    else
1681      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1682  }
1683   [(set_attr "type" "multiple")
1684    (set_attr "length" "8")]
1685 )
1686
1687 ;; Widening operations.
1688
1689 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1690   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1691         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1692                                (match_operand:VQW 1 "register_operand" "w")
1693                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1694                             )))]
1695   "TARGET_SIMD"
1696   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1697   [(set_attr "type" "neon_shift_imm_long")]
1698 )
1699
1700 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1701   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1702         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1703                                (match_operand:VQW 1 "register_operand" "w")
1704                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1705                             )))]
1706   "TARGET_SIMD"
1707   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1708   [(set_attr "type" "neon_shift_imm_long")]
1709 )
1710
1711 (define_expand "vec_unpack<su>_hi_<mode>"
1712   [(match_operand:<VWIDE> 0 "register_operand")
1713    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1714   "TARGET_SIMD"
1715   {
1716     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1717     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1718                                                           operands[1], p));
1719     DONE;
1720   }
1721 )
1722
1723 (define_expand "vec_unpack<su>_lo_<mode>"
1724   [(match_operand:<VWIDE> 0 "register_operand")
1725    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1726   "TARGET_SIMD"
1727   {
1728     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1729     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1730                                                           operands[1], p));
1731     DONE;
1732   }
1733 )
1734
1735 ;; Widening arithmetic.
1736
1737 (define_insn "*aarch64_<su>mlal_lo<mode>"
1738   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1739         (plus:<VWIDE>
1740           (mult:<VWIDE>
1741               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1742                  (match_operand:VQW 2 "register_operand" "w")
1743                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1744               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1745                  (match_operand:VQW 4 "register_operand" "w")
1746                  (match_dup 3))))
1747           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1748   "TARGET_SIMD"
1749   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1750   [(set_attr "type" "neon_mla_<Vetype>_long")]
1751 )
1752
1753 (define_insn "*aarch64_<su>mlal_hi<mode>"
1754   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1755         (plus:<VWIDE>
1756           (mult:<VWIDE>
1757               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1758                  (match_operand:VQW 2 "register_operand" "w")
1759                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1760               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1761                  (match_operand:VQW 4 "register_operand" "w")
1762                  (match_dup 3))))
1763           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1764   "TARGET_SIMD"
1765   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1766   [(set_attr "type" "neon_mla_<Vetype>_long")]
1767 )
1768
1769 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1770   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1771         (minus:<VWIDE>
1772           (match_operand:<VWIDE> 1 "register_operand" "0")
1773           (mult:<VWIDE>
1774               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1775                  (match_operand:VQW 2 "register_operand" "w")
1776                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1777               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1778                  (match_operand:VQW 4 "register_operand" "w")
1779                  (match_dup 3))))))]
1780   "TARGET_SIMD"
1781   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1782   [(set_attr "type" "neon_mla_<Vetype>_long")]
1783 )
1784
1785 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1786   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1787         (minus:<VWIDE>
1788           (match_operand:<VWIDE> 1 "register_operand" "0")
1789           (mult:<VWIDE>
1790               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1791                  (match_operand:VQW 2 "register_operand" "w")
1792                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1793               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1794                  (match_operand:VQW 4 "register_operand" "w")
1795                  (match_dup 3))))))]
1796   "TARGET_SIMD"
1797   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1798   [(set_attr "type" "neon_mla_<Vetype>_long")]
1799 )
1800
1801 (define_insn "*aarch64_<su>mlal<mode>"
1802   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1803         (plus:<VWIDE>
1804           (mult:<VWIDE>
1805             (ANY_EXTEND:<VWIDE>
1806               (match_operand:VD_BHSI 1 "register_operand" "w"))
1807             (ANY_EXTEND:<VWIDE>
1808               (match_operand:VD_BHSI 2 "register_operand" "w")))
1809           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1810   "TARGET_SIMD"
1811   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1812   [(set_attr "type" "neon_mla_<Vetype>_long")]
1813 )
1814
1815 (define_insn "*aarch64_<su>mlsl<mode>"
1816   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1817         (minus:<VWIDE>
1818           (match_operand:<VWIDE> 1 "register_operand" "0")
1819           (mult:<VWIDE>
1820             (ANY_EXTEND:<VWIDE>
1821               (match_operand:VD_BHSI 2 "register_operand" "w"))
1822             (ANY_EXTEND:<VWIDE>
1823               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1824   "TARGET_SIMD"
1825   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1826   [(set_attr "type" "neon_mla_<Vetype>_long")]
1827 )
1828
1829 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1830  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1831        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1832                            (match_operand:VQW 1 "register_operand" "w")
1833                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1834                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1835                            (match_operand:VQW 2 "register_operand" "w")
1836                            (match_dup 3)))))]
1837   "TARGET_SIMD"
1838   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1839   [(set_attr "type" "neon_mul_<Vetype>_long")]
1840 )
1841
1842 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
1843   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1844         (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
1845                          (match_operand:VD_BHSI 1 "register_operand" "w"))
1846                       (ANY_EXTEND:<VWIDE>
1847                          (match_operand:VD_BHSI 2 "register_operand" "w"))))]
1848   "TARGET_SIMD"
1849   "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1850   [(set_attr "type" "neon_mul_<Vetype>_long")]
1851 )
1852
1853 (define_expand "vec_widen_<su>mult_lo_<mode>"
1854   [(match_operand:<VWIDE> 0 "register_operand")
1855    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1856    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1857  "TARGET_SIMD"
1858  {
1859    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1860    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1861                                                        operands[1],
1862                                                        operands[2], p));
1863    DONE;
1864  }
1865 )
1866
1867 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1868  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1869       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1870                             (match_operand:VQW 1 "register_operand" "w")
1871                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1872                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1873                             (match_operand:VQW 2 "register_operand" "w")
1874                             (match_dup 3)))))]
1875   "TARGET_SIMD"
1876   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1877   [(set_attr "type" "neon_mul_<Vetype>_long")]
1878 )
1879
1880 (define_expand "vec_widen_<su>mult_hi_<mode>"
1881   [(match_operand:<VWIDE> 0 "register_operand")
1882    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1883    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1884  "TARGET_SIMD"
1885  {
1886    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1887    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1888                                                        operands[1],
1889                                                        operands[2], p));
1890    DONE;
1891
1892  }
1893 )
1894
1895 ;; vmull_lane_s16 intrinsics
1896 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
1897   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1898         (mult:<VWIDE>
1899           (ANY_EXTEND:<VWIDE>
1900             (match_operand:<VCOND> 1 "register_operand" "w"))
1901           (ANY_EXTEND:<VWIDE>
1902             (vec_duplicate:<VCOND>
1903               (vec_select:<VEL>
1904                 (match_operand:VDQHS 2 "register_operand" "<vwx>")
1905                 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
1906   "TARGET_SIMD"
1907   {
1908     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
1909     return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
1910   }
1911   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
1912 )
1913
1914 ;; vmlal_lane_s16 intrinsics
1915 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
1916   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1917         (plus:<VWIDE>
1918           (mult:<VWIDE>
1919             (ANY_EXTEND:<VWIDE>
1920               (match_operand:<VCOND> 2 "register_operand" "w"))
1921             (ANY_EXTEND:<VWIDE>
1922               (vec_duplicate:<VCOND>
1923                 (vec_select:<VEL>
1924                   (match_operand:VDQHS 3 "register_operand" "<vwx>")
1925                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
1926           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1927   "TARGET_SIMD"
1928   {
1929     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1930     return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
1931   }
1932   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
1933 )
1934
1935 ;; FP vector operations.
1936 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1937 ;; double-precision (64-bit) floating-point data types and arithmetic as
1938 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1939 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1940 ;;
1941 ;; Floating-point operations can raise an exception.  Vectorizing such
1942 ;; operations are safe because of reasons explained below.
1943 ;;
1944 ;; ARMv8 permits an extension to enable trapped floating-point
1945 ;; exception handling, however this is an optional feature.  In the
1946 ;; event of a floating-point exception being raised by vectorised
1947 ;; code then:
1948 ;; 1.  If trapped floating-point exceptions are available, then a trap
1949 ;;     will be taken when any lane raises an enabled exception.  A trap
1950 ;;     handler may determine which lane raised the exception.
1951 ;; 2.  Alternatively a sticky exception flag is set in the
1952 ;;     floating-point status register (FPSR).  Software may explicitly
1953 ;;     test the exception flags, in which case the tests will either
1954 ;;     prevent vectorisation, allowing precise identification of the
1955 ;;     failing operation, or if tested outside of vectorisable regions
1956 ;;     then the specific operation and lane are not of interest.
1957
1958 ;; FP arithmetic operations.
1959
1960 (define_insn "add<mode>3"
1961  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1962        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1963                    (match_operand:VHSDF 2 "register_operand" "w")))]
1964  "TARGET_SIMD"
1965  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1966   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1967 )
1968
1969 (define_insn "sub<mode>3"
1970  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1971        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1972                     (match_operand:VHSDF 2 "register_operand" "w")))]
1973  "TARGET_SIMD"
1974  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1975   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1976 )
1977
1978 (define_insn "mul<mode>3"
1979  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1980        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1981                    (match_operand:VHSDF 2 "register_operand" "w")))]
1982  "TARGET_SIMD"
1983  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1984   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1985 )
1986
1987 (define_expand "div<mode>3"
1988  [(set (match_operand:VHSDF 0 "register_operand")
1989        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1990                   (match_operand:VHSDF 2 "register_operand")))]
1991  "TARGET_SIMD"
1992 {
1993   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1994     DONE;
1995
1996   operands[1] = force_reg (<MODE>mode, operands[1]);
1997 })
1998
1999 (define_insn "*div<mode>3"
2000  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2001        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2002                  (match_operand:VHSDF 2 "register_operand" "w")))]
2003  "TARGET_SIMD"
2004  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2005   [(set_attr "type" "neon_fp_div_<stype><q>")]
2006 )
2007
2008 (define_insn "neg<mode>2"
2009  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2010        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2011  "TARGET_SIMD"
2012  "fneg\\t%0.<Vtype>, %1.<Vtype>"
2013   [(set_attr "type" "neon_fp_neg_<stype><q>")]
2014 )
2015
2016 (define_insn "abs<mode>2"
2017  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2018        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2019  "TARGET_SIMD"
2020  "fabs\\t%0.<Vtype>, %1.<Vtype>"
2021   [(set_attr "type" "neon_fp_abs_<stype><q>")]
2022 )
2023
2024 (define_insn "fma<mode>4"
2025   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2026        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2027                   (match_operand:VHSDF 2 "register_operand" "w")
2028                   (match_operand:VHSDF 3 "register_operand" "0")))]
2029   "TARGET_SIMD"
2030  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2031   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2032 )
2033
2034 (define_insn "*aarch64_fma4_elt<mode>"
2035   [(set (match_operand:VDQF 0 "register_operand" "=w")
2036     (fma:VDQF
2037       (vec_duplicate:VDQF
2038         (vec_select:<VEL>
2039           (match_operand:VDQF 1 "register_operand" "<h_con>")
2040           (parallel [(match_operand:SI 2 "immediate_operand")])))
2041       (match_operand:VDQF 3 "register_operand" "w")
2042       (match_operand:VDQF 4 "register_operand" "0")))]
2043   "TARGET_SIMD"
2044   {
2045     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2046     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2047   }
2048   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2049 )
2050
2051 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2052   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2053     (fma:VDQSF
2054       (vec_duplicate:VDQSF
2055         (vec_select:<VEL>
2056           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2057           (parallel [(match_operand:SI 2 "immediate_operand")])))
2058       (match_operand:VDQSF 3 "register_operand" "w")
2059       (match_operand:VDQSF 4 "register_operand" "0")))]
2060   "TARGET_SIMD"
2061   {
2062     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2063     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2064   }
2065   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2066 )
2067
2068 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
2069   [(set (match_operand:VMUL 0 "register_operand" "=w")
2070     (fma:VMUL
2071       (vec_duplicate:VMUL
2072           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2073       (match_operand:VMUL 2 "register_operand" "w")
2074       (match_operand:VMUL 3 "register_operand" "0")))]
2075   "TARGET_SIMD"
2076   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2077   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2078 )
2079
2080 (define_insn "*aarch64_fma4_elt_to_64v2df"
2081   [(set (match_operand:DF 0 "register_operand" "=w")
2082     (fma:DF
2083         (vec_select:DF
2084           (match_operand:V2DF 1 "register_operand" "w")
2085           (parallel [(match_operand:SI 2 "immediate_operand")]))
2086       (match_operand:DF 3 "register_operand" "w")
2087       (match_operand:DF 4 "register_operand" "0")))]
2088   "TARGET_SIMD"
2089   {
2090     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2091     return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2092   }
2093   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2094 )
2095
2096 (define_insn "fnma<mode>4"
2097   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2098         (fma:VHSDF
2099           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2100           (match_operand:VHSDF 2 "register_operand" "w")
2101           (match_operand:VHSDF 3 "register_operand" "0")))]
2102   "TARGET_SIMD"
2103   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2104   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2105 )
2106
2107 (define_insn "*aarch64_fnma4_elt<mode>"
2108   [(set (match_operand:VDQF 0 "register_operand" "=w")
2109     (fma:VDQF
2110       (neg:VDQF
2111         (match_operand:VDQF 3 "register_operand" "w"))
2112       (vec_duplicate:VDQF
2113         (vec_select:<VEL>
2114           (match_operand:VDQF 1 "register_operand" "<h_con>")
2115           (parallel [(match_operand:SI 2 "immediate_operand")])))
2116       (match_operand:VDQF 4 "register_operand" "0")))]
2117   "TARGET_SIMD"
2118   {
2119     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2120     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2121   }
2122   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2123 )
2124
2125 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2126   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2127     (fma:VDQSF
2128       (neg:VDQSF
2129         (match_operand:VDQSF 3 "register_operand" "w"))
2130       (vec_duplicate:VDQSF
2131         (vec_select:<VEL>
2132           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2133           (parallel [(match_operand:SI 2 "immediate_operand")])))
2134       (match_operand:VDQSF 4 "register_operand" "0")))]
2135   "TARGET_SIMD"
2136   {
2137     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2138     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2139   }
2140   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2141 )
2142
2143 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2144   [(set (match_operand:VMUL 0 "register_operand" "=w")
2145     (fma:VMUL
2146       (neg:VMUL
2147         (match_operand:VMUL 2 "register_operand" "w"))
2148       (vec_duplicate:VMUL
2149         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2150       (match_operand:VMUL 3 "register_operand" "0")))]
2151   "TARGET_SIMD"
2152   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2153   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2154 )
2155
2156 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2157   [(set (match_operand:DF 0 "register_operand" "=w")
2158     (fma:DF
2159       (vec_select:DF
2160         (match_operand:V2DF 1 "register_operand" "w")
2161         (parallel [(match_operand:SI 2 "immediate_operand")]))
2162       (neg:DF
2163         (match_operand:DF 3 "register_operand" "w"))
2164       (match_operand:DF 4 "register_operand" "0")))]
2165   "TARGET_SIMD"
2166   {
2167     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2168     return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2169   }
2170   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2171 )
2172
2173 ;; Vector versions of the floating-point frint patterns.
2174 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2175 (define_insn "<frint_pattern><mode>2"
2176   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2177         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2178                        FRINT))]
2179   "TARGET_SIMD"
2180   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2181   [(set_attr "type" "neon_fp_round_<stype><q>")]
2182 )
2183
2184 ;; Vector versions of the fcvt standard patterns.
2185 ;; Expands to lbtrunc, lround, lceil, lfloor
2186 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2187   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2188         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2189                                [(match_operand:VHSDF 1 "register_operand" "w")]
2190                                FCVT)))]
2191   "TARGET_SIMD"
2192   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2193   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2194 )
2195
2196 ;; HF Scalar variants of related SIMD instructions.
2197 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2198   [(set (match_operand:HI 0 "register_operand" "=w")
2199         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2200                       FCVT)))]
2201   "TARGET_SIMD_F16INST"
2202   "fcvt<frint_suffix><su>\t%h0, %h1"
2203   [(set_attr "type" "neon_fp_to_int_s")]
2204 )
2205
2206 (define_insn "<optab>_trunchfhi2"
2207   [(set (match_operand:HI 0 "register_operand" "=w")
2208         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2209   "TARGET_SIMD_F16INST"
2210   "fcvtz<su>\t%h0, %h1"
2211   [(set_attr "type" "neon_fp_to_int_s")]
2212 )
2213
2214 (define_insn "<optab>hihf2"
2215   [(set (match_operand:HF 0 "register_operand" "=w")
2216         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2217   "TARGET_SIMD_F16INST"
2218   "<su_optab>cvtf\t%h0, %h1"
2219   [(set_attr "type" "neon_int_to_fp_s")]
2220 )
2221
2222 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2223   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2224         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2225                                [(mult:VDQF
2226          (match_operand:VDQF 1 "register_operand" "w")
2227          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2228                                UNSPEC_FRINTZ)))]
2229   "TARGET_SIMD
2230    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2231                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2232   {
2233     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2234     char buf[64];
2235     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2236     output_asm_insn (buf, operands);
2237     return "";
2238   }
2239   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2240 )
2241
2242 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2243   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2244         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2245                                [(match_operand:VHSDF 1 "register_operand")]
2246                                 UNSPEC_FRINTZ)))]
2247   "TARGET_SIMD"
2248   {})
2249
2250 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2251   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2252         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2253                                [(match_operand:VHSDF 1 "register_operand")]
2254                                 UNSPEC_FRINTZ)))]
2255   "TARGET_SIMD"
2256   {})
2257
2258 (define_expand "ftrunc<VHSDF:mode>2"
2259   [(set (match_operand:VHSDF 0 "register_operand")
2260         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2261                        UNSPEC_FRINTZ))]
2262   "TARGET_SIMD"
2263   {})
2264
2265 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2266   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2267         (FLOATUORS:VHSDF
2268           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2269   "TARGET_SIMD"
2270   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2271   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2272 )
2273
2274 ;; Conversions between vectors of floats and doubles.
2275 ;; Contains a mix of patterns to match standard pattern names
2276 ;; and those for intrinsics.
2277
2278 ;; Float widening operations.
2279
2280 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2281   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2282         (float_extend:<VWIDE> (vec_select:<VHALF>
2283                                (match_operand:VQ_HSF 1 "register_operand" "w")
2284                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2285                             )))]
2286   "TARGET_SIMD"
2287   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2288   [(set_attr "type" "neon_fp_cvt_widen_s")]
2289 )
2290
2291 ;; Convert between fixed-point and floating-point (vector modes)
2292
2293 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2294   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2295         (unspec:<VHSDF:FCVT_TARGET>
2296           [(match_operand:VHSDF 1 "register_operand" "w")
2297            (match_operand:SI 2 "immediate_operand" "i")]
2298          FCVT_F2FIXED))]
2299   "TARGET_SIMD"
2300   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2301   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2302 )
2303
2304 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2305   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2306         (unspec:<VDQ_HSDI:FCVT_TARGET>
2307           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2308            (match_operand:SI 2 "immediate_operand" "i")]
2309          FCVT_FIXED2F))]
2310   "TARGET_SIMD"
2311   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2312   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2313 )
2314
2315 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2316 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2317 ;; the meaning of HI and LO changes depending on the target endianness.
2318 ;; While elsewhere we map the higher numbered elements of a vector to
2319 ;; the lower architectural lanes of the vector, for these patterns we want
2320 ;; to always treat "hi" as referring to the higher architectural lanes.
2321 ;; Consequently, while the patterns below look inconsistent with our
2322 ;; other big-endian patterns their behavior is as required.
2323
2324 (define_expand "vec_unpacks_lo_<mode>"
2325   [(match_operand:<VWIDE> 0 "register_operand")
2326    (match_operand:VQ_HSF 1 "register_operand")]
2327   "TARGET_SIMD"
2328   {
2329     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2330     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2331                                                        operands[1], p));
2332     DONE;
2333   }
2334 )
2335
2336 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2337   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2338         (float_extend:<VWIDE> (vec_select:<VHALF>
2339                                (match_operand:VQ_HSF 1 "register_operand" "w")
2340                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2341                             )))]
2342   "TARGET_SIMD"
2343   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2344   [(set_attr "type" "neon_fp_cvt_widen_s")]
2345 )
2346
2347 (define_expand "vec_unpacks_hi_<mode>"
2348   [(match_operand:<VWIDE> 0 "register_operand")
2349    (match_operand:VQ_HSF 1 "register_operand")]
2350   "TARGET_SIMD"
2351   {
2352     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2353     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2354                                                        operands[1], p));
2355     DONE;
2356   }
2357 )
2358 (define_insn "aarch64_float_extend_lo_<Vwide>"
2359   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2360         (float_extend:<VWIDE>
2361           (match_operand:VDF 1 "register_operand" "w")))]
2362   "TARGET_SIMD"
2363   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2364   [(set_attr "type" "neon_fp_cvt_widen_s")]
2365 )
2366
2367 ;; Float narrowing operations.
2368
2369 (define_insn "aarch64_float_truncate_lo_<mode>"
2370   [(set (match_operand:VDF 0 "register_operand" "=w")
2371       (float_truncate:VDF
2372         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2373   "TARGET_SIMD"
2374   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2375   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2376 )
2377
2378 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2379   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2380     (vec_concat:<VDBL>
2381       (match_operand:VDF 1 "register_operand" "0")
2382       (float_truncate:VDF
2383         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2384   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2385   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2386   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2387 )
2388
2389 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2390   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2391     (vec_concat:<VDBL>
2392       (float_truncate:VDF
2393         (match_operand:<VWIDE> 2 "register_operand" "w"))
2394       (match_operand:VDF 1 "register_operand" "0")))]
2395   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2396   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2397   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2398 )
2399
2400 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2401   [(match_operand:<VDBL> 0 "register_operand")
2402    (match_operand:VDF 1 "register_operand")
2403    (match_operand:<VWIDE> 2 "register_operand")]
2404   "TARGET_SIMD"
2405 {
2406   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2407                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2408                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2409   emit_insn (gen (operands[0], operands[1], operands[2]));
2410   DONE;
2411 }
2412 )
2413
2414 (define_expand "vec_pack_trunc_v2df"
2415   [(set (match_operand:V4SF 0 "register_operand")
2416       (vec_concat:V4SF
2417         (float_truncate:V2SF
2418             (match_operand:V2DF 1 "register_operand"))
2419         (float_truncate:V2SF
2420             (match_operand:V2DF 2 "register_operand"))
2421           ))]
2422   "TARGET_SIMD"
2423   {
2424     rtx tmp = gen_reg_rtx (V2SFmode);
2425     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2426     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2427
2428     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2429     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2430                                                    tmp, operands[hi]));
2431     DONE;
2432   }
2433 )
2434
2435 (define_expand "vec_pack_trunc_df"
2436   [(set (match_operand:V2SF 0 "register_operand")
2437       (vec_concat:V2SF
2438         (float_truncate:SF
2439             (match_operand:DF 1 "register_operand"))
2440         (float_truncate:SF
2441             (match_operand:DF 2 "register_operand"))
2442           ))]
2443   "TARGET_SIMD"
2444   {
2445     rtx tmp = gen_reg_rtx (V2SFmode);
2446     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2447     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2448
2449     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2450     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2451     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2452     DONE;
2453   }
2454 )
2455
2456 ;; FP Max/Min
2457 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2458 ;; expression like:
2459 ;;      a = (b < c) ? b : c;
2460 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2461 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2462 ;; -ffast-math.
2463 ;;
2464 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2465 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2466 ;; operand will be returned when both operands are zero (i.e. they may not
2467 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2468 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2469 ;; NaNs.
2470
2471 (define_insn "<su><maxmin><mode>3"
2472   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2473         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2474                        (match_operand:VHSDF 2 "register_operand" "w")))]
2475   "TARGET_SIMD"
2476   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2477   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2478 )
2479
2480 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2481 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2482 ;; which implement the IEEE fmax ()/fmin () functions.
2483 (define_insn "<maxmin_uns><mode>3"
2484   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2485        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2486                       (match_operand:VHSDF 2 "register_operand" "w")]
2487                       FMAXMIN_UNS))]
2488   "TARGET_SIMD"
2489   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2490   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2491 )
2492
2493 ;; 'across lanes' add.
2494
2495 (define_expand "reduc_plus_scal_<mode>"
2496   [(match_operand:<VEL> 0 "register_operand")
2497    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2498                UNSPEC_ADDV)]
2499   "TARGET_SIMD"
2500   {
2501     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2502     rtx scratch = gen_reg_rtx (<MODE>mode);
2503     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2504     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2505     DONE;
2506   }
2507 )
2508
2509 (define_insn "aarch64_faddp<mode>"
2510  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2511        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2512                       (match_operand:VHSDF 2 "register_operand" "w")]
2513         UNSPEC_FADDV))]
2514  "TARGET_SIMD"
2515  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2516   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2517 )
2518
2519 (define_insn "aarch64_reduc_plus_internal<mode>"
2520  [(set (match_operand:VDQV 0 "register_operand" "=w")
2521        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2522                     UNSPEC_ADDV))]
2523  "TARGET_SIMD"
2524  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2525   [(set_attr "type" "neon_reduc_add<q>")]
2526 )
2527
2528 ;; ADDV with result zero-extended to SI/DImode (for popcount).
2529 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
2530  [(set (match_operand:GPI 0 "register_operand" "=w")
2531        (zero_extend:GPI
2532         (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
2533                              UNSPEC_ADDV)))]
2534  "TARGET_SIMD"
2535  "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
2536   [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
2537 )
2538
2539 (define_insn "aarch64_reduc_plus_internalv2si"
2540  [(set (match_operand:V2SI 0 "register_operand" "=w")
2541        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2542                     UNSPEC_ADDV))]
2543  "TARGET_SIMD"
2544  "addp\\t%0.2s, %1.2s, %1.2s"
2545   [(set_attr "type" "neon_reduc_add")]
2546 )
2547
2548 (define_insn "reduc_plus_scal_<mode>"
2549  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2550        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2551                    UNSPEC_FADDV))]
2552  "TARGET_SIMD"
2553  "faddp\\t%<Vetype>0, %1.<Vtype>"
2554   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2555 )
2556
2557 (define_expand "reduc_plus_scal_v4sf"
2558  [(set (match_operand:SF 0 "register_operand")
2559        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2560                     UNSPEC_FADDV))]
2561  "TARGET_SIMD"
2562 {
2563   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2564   rtx scratch = gen_reg_rtx (V4SFmode);
2565   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2566   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2567   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2568   DONE;
2569 })
2570
2571 (define_insn "clrsb<mode>2"
2572   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2573         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2574   "TARGET_SIMD"
2575   "cls\\t%0.<Vtype>, %1.<Vtype>"
2576   [(set_attr "type" "neon_cls<q>")]
2577 )
2578
2579 (define_insn "clz<mode>2"
2580  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2581        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2582  "TARGET_SIMD"
2583  "clz\\t%0.<Vtype>, %1.<Vtype>"
2584   [(set_attr "type" "neon_cls<q>")]
2585 )
2586
2587 (define_insn "popcount<mode>2"
2588   [(set (match_operand:VB 0 "register_operand" "=w")
2589         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2590   "TARGET_SIMD"
2591   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2592   [(set_attr "type" "neon_cnt<q>")]
2593 )
2594
2595 ;; 'across lanes' max and min ops.
2596
2597 ;; Template for outputting a scalar, so we can create __builtins which can be
2598 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2599 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2600   [(match_operand:<VEL> 0 "register_operand")
2601    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2602                   FMAXMINV)]
2603   "TARGET_SIMD"
2604   {
2605     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2606     rtx scratch = gen_reg_rtx (<MODE>mode);
2607     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2608                                                               operands[1]));
2609     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2610     DONE;
2611   }
2612 )
2613
2614 ;; Likewise for integer cases, signed and unsigned.
2615 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2616   [(match_operand:<VEL> 0 "register_operand")
2617    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2618                     MAXMINV)]
2619   "TARGET_SIMD"
2620   {
2621     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2622     rtx scratch = gen_reg_rtx (<MODE>mode);
2623     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2624                                                               operands[1]));
2625     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2626     DONE;
2627   }
2628 )
2629
2630 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2631  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2632        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2633                     MAXMINV))]
2634  "TARGET_SIMD"
2635  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2636   [(set_attr "type" "neon_reduc_minmax<q>")]
2637 )
2638
2639 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2640  [(set (match_operand:V2SI 0 "register_operand" "=w")
2641        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2642                     MAXMINV))]
2643  "TARGET_SIMD"
2644  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2645   [(set_attr "type" "neon_reduc_minmax")]
2646 )
2647
2648 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2649  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2650        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2651                       FMAXMINV))]
2652  "TARGET_SIMD"
2653  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2654   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2655 )
2656
2657 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2658 ;; allocation.
2659 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2660 ;; to select.
2661 ;;
2662 ;; Thus our BSL is of the form:
2663 ;;   op0 = bsl (mask, op2, op3)
2664 ;; We can use any of:
2665 ;;
2666 ;;   if (op0 = mask)
2667 ;;     bsl mask, op1, op2
2668 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2669 ;;     bit op0, op2, mask
2670 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2671 ;;     bif op0, op1, mask
2672 ;;
2673 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2674 ;; Some forms of straight-line code may generate the equivalent form
2675 ;; in *aarch64_simd_bsl<mode>_alt.
2676
2677 (define_insn "aarch64_simd_bsl<mode>_internal"
2678   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2679         (xor:VDQ_I
2680            (and:VDQ_I
2681              (xor:VDQ_I
2682                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2683                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2684              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2685           (match_dup:<V_INT_EQUIV> 3)
2686         ))]
2687   "TARGET_SIMD"
2688   "@
2689   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2690   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2691   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2692   [(set_attr "type" "neon_bsl<q>")]
2693 )
2694
2695 ;; We need this form in addition to the above pattern to match the case
2696 ;; when combine tries merging three insns such that the second operand of
2697 ;; the outer XOR matches the second operand of the inner XOR rather than
2698 ;; the first.  The two are equivalent but since recog doesn't try all
2699 ;; permutations of commutative operations, we have to have a separate pattern.
2700
2701 (define_insn "*aarch64_simd_bsl<mode>_alt"
2702   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2703         (xor:VDQ_I
2704            (and:VDQ_I
2705              (xor:VDQ_I
2706                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2707                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2708               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2709           (match_dup:<V_INT_EQUIV> 2)))]
2710   "TARGET_SIMD"
2711   "@
2712   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2713   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2714   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2715   [(set_attr "type" "neon_bsl<q>")]
2716 )
2717
2718 ;; DImode is special, we want to avoid computing operations which are
2719 ;; more naturally computed in general purpose registers in the vector
2720 ;; registers.  If we do that, we need to move all three operands from general
2721 ;; purpose registers to vector registers, then back again.  However, we
2722 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2723 ;; optimizations based on the component operations of a BSL.
2724 ;;
2725 ;; That means we need a splitter back to the individual operations, if they
2726 ;; would be better calculated on the integer side.
2727
2728 (define_insn_and_split "aarch64_simd_bsldi_internal"
2729   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2730         (xor:DI
2731            (and:DI
2732              (xor:DI
2733                (match_operand:DI 3 "register_operand" "w,0,w,r")
2734                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2735              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2736           (match_dup:DI 3)
2737         ))]
2738   "TARGET_SIMD"
2739   "@
2740   bsl\\t%0.8b, %2.8b, %3.8b
2741   bit\\t%0.8b, %2.8b, %1.8b
2742   bif\\t%0.8b, %3.8b, %1.8b
2743   #"
2744   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2745   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2746 {
2747   /* Split back to individual operations.  If we're before reload, and
2748      able to create a temporary register, do so.  If we're after reload,
2749      we've got an early-clobber destination register, so use that.
2750      Otherwise, we can't create pseudos and we can't yet guarantee that
2751      operands[0] is safe to write, so FAIL to split.  */
2752
2753   rtx scratch;
2754   if (reload_completed)
2755     scratch = operands[0];
2756   else if (can_create_pseudo_p ())
2757     scratch = gen_reg_rtx (DImode);
2758   else
2759     FAIL;
2760
2761   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2762   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2763   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2764   DONE;
2765 }
2766   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2767    (set_attr "length" "4,4,4,12")]
2768 )
2769
2770 (define_insn_and_split "aarch64_simd_bsldi_alt"
2771   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2772         (xor:DI
2773            (and:DI
2774              (xor:DI
2775                (match_operand:DI 3 "register_operand" "w,w,0,r")
2776                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2777              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2778           (match_dup:DI 2)
2779         ))]
2780   "TARGET_SIMD"
2781   "@
2782   bsl\\t%0.8b, %3.8b, %2.8b
2783   bit\\t%0.8b, %3.8b, %1.8b
2784   bif\\t%0.8b, %2.8b, %1.8b
2785   #"
2786   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2787   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2788 {
2789   /* Split back to individual operations.  If we're before reload, and
2790      able to create a temporary register, do so.  If we're after reload,
2791      we've got an early-clobber destination register, so use that.
2792      Otherwise, we can't create pseudos and we can't yet guarantee that
2793      operands[0] is safe to write, so FAIL to split.  */
2794
2795   rtx scratch;
2796   if (reload_completed)
2797     scratch = operands[0];
2798   else if (can_create_pseudo_p ())
2799     scratch = gen_reg_rtx (DImode);
2800   else
2801     FAIL;
2802
2803   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2804   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2805   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2806   DONE;
2807 }
2808   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2809    (set_attr "length" "4,4,4,12")]
2810 )
2811
2812 (define_expand "aarch64_simd_bsl<mode>"
2813   [(match_operand:VALLDIF 0 "register_operand")
2814    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2815    (match_operand:VALLDIF 2 "register_operand")
2816    (match_operand:VALLDIF 3 "register_operand")]
2817  "TARGET_SIMD"
2818 {
2819   /* We can't alias operands together if they have different modes.  */
2820   rtx tmp = operands[0];
2821   if (FLOAT_MODE_P (<MODE>mode))
2822     {
2823       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2824       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2825       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2826     }
2827   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2828   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2829                                                          operands[1],
2830                                                          operands[2],
2831                                                          operands[3]));
2832   if (tmp != operands[0])
2833     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2834
2835   DONE;
2836 })
2837
2838 (define_expand "vcond_mask_<mode><v_int_equiv>"
2839   [(match_operand:VALLDI 0 "register_operand")
2840    (match_operand:VALLDI 1 "nonmemory_operand")
2841    (match_operand:VALLDI 2 "nonmemory_operand")
2842    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2843   "TARGET_SIMD"
2844 {
2845   /* If we have (a = (P) ? -1 : 0);
2846      Then we can simply move the generated mask (result must be int).  */
2847   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2848       && operands[2] == CONST0_RTX (<MODE>mode))
2849     emit_move_insn (operands[0], operands[3]);
2850   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2851   else if (operands[1] == CONST0_RTX (<MODE>mode)
2852            && operands[2] == CONSTM1_RTX (<MODE>mode))
2853     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2854   else
2855     {
2856       if (!REG_P (operands[1]))
2857         operands[1] = force_reg (<MODE>mode, operands[1]);
2858       if (!REG_P (operands[2]))
2859         operands[2] = force_reg (<MODE>mode, operands[2]);
2860       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2861                                              operands[1], operands[2]));
2862     }
2863
2864   DONE;
2865 })
2866
2867 ;; Patterns comparing two vectors to produce a mask.
2868
2869 (define_expand "vec_cmp<mode><mode>"
2870   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2871           (match_operator 1 "comparison_operator"
2872             [(match_operand:VSDQ_I_DI 2 "register_operand")
2873              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2874   "TARGET_SIMD"
2875 {
2876   rtx mask = operands[0];
2877   enum rtx_code code = GET_CODE (operands[1]);
2878
2879   switch (code)
2880     {
2881     case NE:
2882     case LE:
2883     case LT:
2884     case GE:
2885     case GT:
2886     case EQ:
2887       if (operands[3] == CONST0_RTX (<MODE>mode))
2888         break;
2889
2890       /* Fall through.  */
2891     default:
2892       if (!REG_P (operands[3]))
2893         operands[3] = force_reg (<MODE>mode, operands[3]);
2894
2895       break;
2896     }
2897
2898   switch (code)
2899     {
2900     case LT:
2901       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2902       break;
2903
2904     case GE:
2905       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2906       break;
2907
2908     case LE:
2909       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2910       break;
2911
2912     case GT:
2913       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2914       break;
2915
2916     case LTU:
2917       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2918       break;
2919
2920     case GEU:
2921       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2922       break;
2923
2924     case LEU:
2925       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2926       break;
2927
2928     case GTU:
2929       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2930       break;
2931
2932     case NE:
2933       /* Handle NE as !EQ.  */
2934       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2935       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2936       break;
2937
2938     case EQ:
2939       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2940       break;
2941
2942     default:
2943       gcc_unreachable ();
2944     }
2945
2946   DONE;
2947 })
2948
2949 (define_expand "vec_cmp<mode><v_int_equiv>"
2950   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2951         (match_operator 1 "comparison_operator"
2952             [(match_operand:VDQF 2 "register_operand")
2953              (match_operand:VDQF 3 "nonmemory_operand")]))]
2954   "TARGET_SIMD"
2955 {
2956   int use_zero_form = 0;
2957   enum rtx_code code = GET_CODE (operands[1]);
2958   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2959
2960   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2961
2962   switch (code)
2963     {
2964     case LE:
2965     case LT:
2966     case GE:
2967     case GT:
2968     case EQ:
2969       if (operands[3] == CONST0_RTX (<MODE>mode))
2970         {
2971           use_zero_form = 1;
2972           break;
2973         }
2974       /* Fall through.  */
2975     default:
2976       if (!REG_P (operands[3]))
2977         operands[3] = force_reg (<MODE>mode, operands[3]);
2978
2979       break;
2980     }
2981
2982   switch (code)
2983     {
2984     case LT:
2985       if (use_zero_form)
2986         {
2987           comparison = gen_aarch64_cmlt<mode>;
2988           break;
2989         }
2990       /* Fall through.  */
2991     case UNLT:
2992       std::swap (operands[2], operands[3]);
2993       /* Fall through.  */
2994     case UNGT:
2995     case GT:
2996       comparison = gen_aarch64_cmgt<mode>;
2997       break;
2998     case LE:
2999       if (use_zero_form)
3000         {
3001           comparison = gen_aarch64_cmle<mode>;
3002           break;
3003         }
3004       /* Fall through.  */
3005     case UNLE:
3006       std::swap (operands[2], operands[3]);
3007       /* Fall through.  */
3008     case UNGE:
3009     case GE:
3010       comparison = gen_aarch64_cmge<mode>;
3011       break;
3012     case NE:
3013     case EQ:
3014       comparison = gen_aarch64_cmeq<mode>;
3015       break;
3016     case UNEQ:
3017     case ORDERED:
3018     case UNORDERED:
3019     case LTGT:
3020       break;
3021     default:
3022       gcc_unreachable ();
3023     }
3024
3025   switch (code)
3026     {
3027     case UNGE:
3028     case UNGT:
3029     case UNLE:
3030     case UNLT:
3031       {
3032         /* All of the above must not raise any FP exceptions.  Thus we first
3033            check each operand for NaNs and force any elements containing NaN to
3034            zero before using them in the compare.
3035            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
3036                                      (cm<cc> (isnan (a) ? 0.0 : a,
3037                                               isnan (b) ? 0.0 : b))
3038            We use the following transformations for doing the comparisions:
3039            a UNGE b -> a GE b
3040            a UNGT b -> a GT b
3041            a UNLE b -> b GE a
3042            a UNLT b -> b GT a.  */
3043
3044         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
3045         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
3046         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
3047         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
3048         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
3049         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
3050         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
3051                                           lowpart_subreg (<V_INT_EQUIV>mode,
3052                                                           operands[2],
3053                                                           <MODE>mode)));
3054         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
3055                                           lowpart_subreg (<V_INT_EQUIV>mode,
3056                                                           operands[3],
3057                                                           <MODE>mode)));
3058         gcc_assert (comparison != NULL);
3059         emit_insn (comparison (operands[0],
3060                                lowpart_subreg (<MODE>mode,
3061                                                tmp0, <V_INT_EQUIV>mode),
3062                                lowpart_subreg (<MODE>mode,
3063                                                tmp1, <V_INT_EQUIV>mode)));
3064         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
3065       }
3066       break;
3067
3068     case LT:
3069     case LE:
3070     case GT:
3071     case GE:
3072     case EQ:
3073     case NE:
3074       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
3075          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
3076          a GE b -> a GE b
3077          a GT b -> a GT b
3078          a LE b -> b GE a
3079          a LT b -> b GT a
3080          a EQ b -> a EQ b
3081          a NE b -> ~(a EQ b)  */
3082       gcc_assert (comparison != NULL);
3083       emit_insn (comparison (operands[0], operands[2], operands[3]));
3084       if (code == NE)
3085         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3086       break;
3087
3088     case LTGT:
3089       /* LTGT is not guranteed to not generate a FP exception.  So let's
3090          go the faster way : ((a > b) || (b > a)).  */
3091       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
3092                                          operands[2], operands[3]));
3093       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
3094       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
3095       break;
3096
3097     case ORDERED:
3098     case UNORDERED:
3099     case UNEQ:
3100       /* cmeq (a, a) & cmeq (b, b).  */
3101       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
3102                                          operands[2], operands[2]));
3103       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
3104       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
3105
3106       if (code == UNORDERED)
3107         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3108       else if (code == UNEQ)
3109         {
3110           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3111           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3112         }
3113       break;
3114
3115     default:
3116       gcc_unreachable ();
3117     }
3118
3119   DONE;
3120 })
3121
3122 (define_expand "vec_cmpu<mode><mode>"
3123   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3124           (match_operator 1 "comparison_operator"
3125             [(match_operand:VSDQ_I_DI 2 "register_operand")
3126              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3127   "TARGET_SIMD"
3128 {
3129   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3130                                       operands[2], operands[3]));
3131   DONE;
3132 })
3133
3134 (define_expand "vcond<mode><mode>"
3135   [(set (match_operand:VALLDI 0 "register_operand")
3136         (if_then_else:VALLDI
3137           (match_operator 3 "comparison_operator"
3138             [(match_operand:VALLDI 4 "register_operand")
3139              (match_operand:VALLDI 5 "nonmemory_operand")])
3140           (match_operand:VALLDI 1 "nonmemory_operand")
3141           (match_operand:VALLDI 2 "nonmemory_operand")))]
3142   "TARGET_SIMD"
3143 {
3144   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3145   enum rtx_code code = GET_CODE (operands[3]);
3146
3147   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3148      it as well as switch operands 1/2 in order to avoid the additional
3149      NOT instruction.  */
3150   if (code == NE)
3151     {
3152       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3153                                     operands[4], operands[5]);
3154       std::swap (operands[1], operands[2]);
3155     }
3156   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3157                                              operands[4], operands[5]));
3158   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3159                                                  operands[2], mask));
3160
3161   DONE;
3162 })
3163
3164 (define_expand "vcond<v_cmp_mixed><mode>"
3165   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3166         (if_then_else:<V_cmp_mixed>
3167           (match_operator 3 "comparison_operator"
3168             [(match_operand:VDQF_COND 4 "register_operand")
3169              (match_operand:VDQF_COND 5 "nonmemory_operand")])
3170           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3171           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3172   "TARGET_SIMD"
3173 {
3174   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3175   enum rtx_code code = GET_CODE (operands[3]);
3176
3177   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3178      it as well as switch operands 1/2 in order to avoid the additional
3179      NOT instruction.  */
3180   if (code == NE)
3181     {
3182       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3183                                     operands[4], operands[5]);
3184       std::swap (operands[1], operands[2]);
3185     }
3186   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3187                                              operands[4], operands[5]));
3188   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3189                                                 operands[0], operands[1],
3190                                                 operands[2], mask));
3191
3192   DONE;
3193 })
3194
3195 (define_expand "vcondu<mode><mode>"
3196   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3197         (if_then_else:VSDQ_I_DI
3198           (match_operator 3 "comparison_operator"
3199             [(match_operand:VSDQ_I_DI 4 "register_operand")
3200              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3201           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3202           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3203   "TARGET_SIMD"
3204 {
3205   rtx mask = gen_reg_rtx (<MODE>mode);
3206   enum rtx_code code = GET_CODE (operands[3]);
3207
3208   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3209      it as well as switch operands 1/2 in order to avoid the additional
3210      NOT instruction.  */
3211   if (code == NE)
3212     {
3213       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3214                                     operands[4], operands[5]);
3215       std::swap (operands[1], operands[2]);
3216     }
3217   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3218                                       operands[4], operands[5]));
3219   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3220                                                  operands[2], mask));
3221   DONE;
3222 })
3223
3224 (define_expand "vcondu<mode><v_cmp_mixed>"
3225   [(set (match_operand:VDQF 0 "register_operand")
3226         (if_then_else:VDQF
3227           (match_operator 3 "comparison_operator"
3228             [(match_operand:<V_cmp_mixed> 4 "register_operand")
3229              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3230           (match_operand:VDQF 1 "nonmemory_operand")
3231           (match_operand:VDQF 2 "nonmemory_operand")))]
3232   "TARGET_SIMD"
3233 {
3234   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3235   enum rtx_code code = GET_CODE (operands[3]);
3236
3237   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3238      it as well as switch operands 1/2 in order to avoid the additional
3239      NOT instruction.  */
3240   if (code == NE)
3241     {
3242       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3243                                     operands[4], operands[5]);
3244       std::swap (operands[1], operands[2]);
3245     }
3246   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3247                                                   mask, operands[3],
3248                                                   operands[4], operands[5]));
3249   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3250                                                  operands[2], mask));
3251   DONE;
3252 })
3253
3254 ;; Patterns for AArch64 SIMD Intrinsics.
3255
3256 ;; Lane extraction with sign extension to general purpose register.
3257 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3258   [(set (match_operand:GPI 0 "register_operand" "=r")
3259         (sign_extend:GPI
3260           (vec_select:<VDQQH:VEL>
3261             (match_operand:VDQQH 1 "register_operand" "w")
3262             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3263   "TARGET_SIMD"
3264   {
3265     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3266                                            INTVAL (operands[2]));
3267     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3268   }
3269   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3270 )
3271
3272 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3273   [(set (match_operand:GPI 0 "register_operand" "=r")
3274         (zero_extend:GPI
3275           (vec_select:<VDQQH:VEL>
3276             (match_operand:VDQQH 1 "register_operand" "w")
3277             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3278   "TARGET_SIMD"
3279   {
3280     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3281                                            INTVAL (operands[2]));
3282     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3283   }
3284   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3285 )
3286
3287 ;; Lane extraction of a value, neither sign nor zero extension
3288 ;; is guaranteed so upper bits should be considered undefined.
3289 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3290 (define_insn "aarch64_get_lane<mode>"
3291   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3292         (vec_select:<VEL>
3293           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3294           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3295   "TARGET_SIMD"
3296   {
3297     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3298     switch (which_alternative)
3299       {
3300         case 0:
3301           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3302         case 1:
3303           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3304         case 2:
3305           return "st1\\t{%1.<Vetype>}[%2], %0";
3306         default:
3307           gcc_unreachable ();
3308       }
3309   }
3310   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3311 )
3312
3313 (define_insn "load_pair_lanes<mode>"
3314   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3315         (vec_concat:<VDBL>
3316            (match_operand:VDC 1 "memory_operand" "Utq")
3317            (match_operand:VDC 2 "memory_operand" "m")))]
3318   "TARGET_SIMD && !STRICT_ALIGNMENT
3319    && rtx_equal_p (XEXP (operands[2], 0),
3320                    plus_constant (Pmode,
3321                                   XEXP (operands[1], 0),
3322                                   GET_MODE_SIZE (<MODE>mode)))"
3323   "ldr\\t%q0, %1"
3324   [(set_attr "type" "neon_load1_1reg_q")]
3325 )
3326
3327 (define_insn "store_pair_lanes<mode>"
3328   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3329         (vec_concat:<VDBL>
3330            (match_operand:VDC 1 "register_operand" "w, r")
3331            (match_operand:VDC 2 "register_operand" "w, r")))]
3332   "TARGET_SIMD"
3333   "@
3334    stp\\t%d1, %d2, %y0
3335    stp\\t%x1, %x2, %y0"
3336   [(set_attr "type" "neon_stp, store_16")]
3337 )
3338
3339 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3340 ;; dest vector.
3341
3342 (define_insn "@aarch64_combinez<mode>"
3343   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3344         (vec_concat:<VDBL>
3345           (match_operand:VDC 1 "general_operand" "w,?r,m")
3346           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3347   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3348   "@
3349    mov\\t%0.8b, %1.8b
3350    fmov\t%d0, %1
3351    ldr\\t%d0, %1"
3352   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3353    (set_attr "arch" "simd,fp,simd")]
3354 )
3355
3356 (define_insn "@aarch64_combinez_be<mode>"
3357   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3358         (vec_concat:<VDBL>
3359           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3360           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3361   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3362   "@
3363    mov\\t%0.8b, %1.8b
3364    fmov\t%d0, %1
3365    ldr\\t%d0, %1"
3366   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3367    (set_attr "arch" "simd,fp,simd")]
3368 )
3369
3370 (define_expand "aarch64_combine<mode>"
3371   [(match_operand:<VDBL> 0 "register_operand")
3372    (match_operand:VDC 1 "register_operand")
3373    (match_operand:VDC 2 "register_operand")]
3374   "TARGET_SIMD"
3375 {
3376   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3377
3378   DONE;
3379 }
3380 )
3381
3382 (define_expand "@aarch64_simd_combine<mode>"
3383   [(match_operand:<VDBL> 0 "register_operand")
3384    (match_operand:VDC 1 "register_operand")
3385    (match_operand:VDC 2 "register_operand")]
3386   "TARGET_SIMD"
3387   {
3388     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3389     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3390     DONE;
3391   }
3392 [(set_attr "type" "multiple")]
3393 )
3394
3395 ;; <su><addsub>l<q>.
3396
3397 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3398  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3399        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3400                            (match_operand:VQW 1 "register_operand" "w")
3401                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3402                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3403                            (match_operand:VQW 2 "register_operand" "w")
3404                            (match_dup 3)))))]
3405   "TARGET_SIMD"
3406   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3407   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3408 )
3409
3410 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3411  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3412        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3413                            (match_operand:VQW 1 "register_operand" "w")
3414                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3415                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3416                            (match_operand:VQW 2 "register_operand" "w")
3417                            (match_dup 3)))))]
3418   "TARGET_SIMD"
3419   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3420   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3421 )
3422
3423
3424 (define_expand "aarch64_saddl2<mode>"
3425   [(match_operand:<VWIDE> 0 "register_operand")
3426    (match_operand:VQW 1 "register_operand")
3427    (match_operand:VQW 2 "register_operand")]
3428   "TARGET_SIMD"
3429 {
3430   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3431   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3432                                                   operands[2], p));
3433   DONE;
3434 })
3435
3436 (define_expand "aarch64_uaddl2<mode>"
3437   [(match_operand:<VWIDE> 0 "register_operand")
3438    (match_operand:VQW 1 "register_operand")
3439    (match_operand:VQW 2 "register_operand")]
3440   "TARGET_SIMD"
3441 {
3442   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3443   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3444                                                   operands[2], p));
3445   DONE;
3446 })
3447
3448 (define_expand "aarch64_ssubl2<mode>"
3449   [(match_operand:<VWIDE> 0 "register_operand")
3450    (match_operand:VQW 1 "register_operand")
3451    (match_operand:VQW 2 "register_operand")]
3452   "TARGET_SIMD"
3453 {
3454   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3455   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3456                                                 operands[2], p));
3457   DONE;
3458 })
3459
3460 (define_expand "aarch64_usubl2<mode>"
3461   [(match_operand:<VWIDE> 0 "register_operand")
3462    (match_operand:VQW 1 "register_operand")
3463    (match_operand:VQW 2 "register_operand")]
3464   "TARGET_SIMD"
3465 {
3466   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3467   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3468                                                 operands[2], p));
3469   DONE;
3470 })
3471
3472 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3473  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3474        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3475                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3476                        (ANY_EXTEND:<VWIDE>
3477                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3478   "TARGET_SIMD"
3479   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3480   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3481 )
3482
3483 ;; <su><addsub>w<q>.
3484
3485 (define_expand "widen_ssum<mode>3"
3486   [(set (match_operand:<VDBLW> 0 "register_operand")
3487         (plus:<VDBLW> (sign_extend:<VDBLW>
3488                         (match_operand:VQW 1 "register_operand"))
3489                       (match_operand:<VDBLW> 2 "register_operand")))]
3490   "TARGET_SIMD"
3491   {
3492     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3493     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3494
3495     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3496                                                 operands[1], p));
3497     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3498     DONE;
3499   }
3500 )
3501
3502 (define_expand "widen_ssum<mode>3"
3503   [(set (match_operand:<VWIDE> 0 "register_operand")
3504         (plus:<VWIDE> (sign_extend:<VWIDE>
3505                         (match_operand:VD_BHSI 1 "register_operand"))
3506                       (match_operand:<VWIDE> 2 "register_operand")))]
3507   "TARGET_SIMD"
3508 {
3509   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3510   DONE;
3511 })
3512
3513 (define_expand "widen_usum<mode>3"
3514   [(set (match_operand:<VDBLW> 0 "register_operand")
3515         (plus:<VDBLW> (zero_extend:<VDBLW>
3516                         (match_operand:VQW 1 "register_operand"))
3517                       (match_operand:<VDBLW> 2 "register_operand")))]
3518   "TARGET_SIMD"
3519   {
3520     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3521     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3522
3523     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3524                                                  operands[1], p));
3525     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3526     DONE;
3527   }
3528 )
3529
3530 (define_expand "widen_usum<mode>3"
3531   [(set (match_operand:<VWIDE> 0 "register_operand")
3532         (plus:<VWIDE> (zero_extend:<VWIDE>
3533                         (match_operand:VD_BHSI 1 "register_operand"))
3534                       (match_operand:<VWIDE> 2 "register_operand")))]
3535   "TARGET_SIMD"
3536 {
3537   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3538   DONE;
3539 })
3540
3541 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3542   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3543         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3544           (ANY_EXTEND:<VWIDE>
3545             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3546   "TARGET_SIMD"
3547   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3548   [(set_attr "type" "neon_sub_widen")]
3549 )
3550
3551 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3552   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3553         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3554           (ANY_EXTEND:<VWIDE>
3555             (vec_select:<VHALF>
3556               (match_operand:VQW 2 "register_operand" "w")
3557               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3558   "TARGET_SIMD"
3559   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3560   [(set_attr "type" "neon_sub_widen")]
3561 )
3562
3563 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3564   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3565         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3566           (ANY_EXTEND:<VWIDE>
3567             (vec_select:<VHALF>
3568               (match_operand:VQW 2 "register_operand" "w")
3569               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3570   "TARGET_SIMD"
3571   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3572   [(set_attr "type" "neon_sub_widen")]
3573 )
3574
3575 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3576   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3577         (plus:<VWIDE>
3578           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3579           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3580   "TARGET_SIMD"
3581   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3582   [(set_attr "type" "neon_add_widen")]
3583 )
3584
3585 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3586   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3587         (plus:<VWIDE>
3588           (ANY_EXTEND:<VWIDE>
3589             (vec_select:<VHALF>
3590               (match_operand:VQW 2 "register_operand" "w")
3591               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3592           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3593   "TARGET_SIMD"
3594   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3595   [(set_attr "type" "neon_add_widen")]
3596 )
3597
3598 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3599   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3600         (plus:<VWIDE>
3601           (ANY_EXTEND:<VWIDE>
3602             (vec_select:<VHALF>
3603               (match_operand:VQW 2 "register_operand" "w")
3604               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3605           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3606   "TARGET_SIMD"
3607   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3608   [(set_attr "type" "neon_add_widen")]
3609 )
3610
3611 (define_expand "aarch64_saddw2<mode>"
3612   [(match_operand:<VWIDE> 0 "register_operand")
3613    (match_operand:<VWIDE> 1 "register_operand")
3614    (match_operand:VQW 2 "register_operand")]
3615   "TARGET_SIMD"
3616 {
3617   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3618   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3619                                                 operands[2], p));
3620   DONE;
3621 })
3622
3623 (define_expand "aarch64_uaddw2<mode>"
3624   [(match_operand:<VWIDE> 0 "register_operand")
3625    (match_operand:<VWIDE> 1 "register_operand")
3626    (match_operand:VQW 2 "register_operand")]
3627   "TARGET_SIMD"
3628 {
3629   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3630   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3631                                                 operands[2], p));
3632   DONE;
3633 })
3634
3635
3636 (define_expand "aarch64_ssubw2<mode>"
3637   [(match_operand:<VWIDE> 0 "register_operand")
3638    (match_operand:<VWIDE> 1 "register_operand")
3639    (match_operand:VQW 2 "register_operand")]
3640   "TARGET_SIMD"
3641 {
3642   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3643   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3644                                                 operands[2], p));
3645   DONE;
3646 })
3647
3648 (define_expand "aarch64_usubw2<mode>"
3649   [(match_operand:<VWIDE> 0 "register_operand")
3650    (match_operand:<VWIDE> 1 "register_operand")
3651    (match_operand:VQW 2 "register_operand")]
3652   "TARGET_SIMD"
3653 {
3654   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3655   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3656                                                 operands[2], p));
3657   DONE;
3658 })
3659
3660 ;; <su><r>h<addsub>.
3661
3662 (define_expand "<u>avg<mode>3_floor"
3663   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3664         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3665                           (match_operand:VDQ_BHSI 2 "register_operand")]
3666                          HADD))]
3667   "TARGET_SIMD"
3668 )
3669
3670 (define_expand "<u>avg<mode>3_ceil"
3671   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3672         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3673                           (match_operand:VDQ_BHSI 2 "register_operand")]
3674                          RHADD))]
3675   "TARGET_SIMD"
3676 )
3677
3678 (define_insn "aarch64_<sur>h<addsub><mode>"
3679   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3680         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3681                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3682                      HADDSUB))]
3683   "TARGET_SIMD"
3684   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3685   [(set_attr "type" "neon_<addsub>_halve<q>")]
3686 )
3687
3688 ;; <r><addsub>hn<q>.
3689
3690 (define_insn "aarch64_<sur><addsub>hn<mode>"
3691   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3692         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3693                             (match_operand:VQN 2 "register_operand" "w")]
3694                            ADDSUBHN))]
3695   "TARGET_SIMD"
3696   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3697   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3698 )
3699
3700 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3701   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3702         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3703                              (match_operand:VQN 2 "register_operand" "w")
3704                              (match_operand:VQN 3 "register_operand" "w")]
3705                             ADDSUBHN2))]
3706   "TARGET_SIMD"
3707   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3708   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3709 )
3710
3711 ;; pmul.
3712
3713 (define_insn "aarch64_pmul<mode>"
3714   [(set (match_operand:VB 0 "register_operand" "=w")
3715         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3716                     (match_operand:VB 2 "register_operand" "w")]
3717                    UNSPEC_PMUL))]
3718  "TARGET_SIMD"
3719  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3720   [(set_attr "type" "neon_mul_<Vetype><q>")]
3721 )
3722
3723 ;; fmulx.
3724
3725 (define_insn "aarch64_fmulx<mode>"
3726   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3727         (unspec:VHSDF_HSDF
3728           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3729            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3730            UNSPEC_FMULX))]
3731  "TARGET_SIMD"
3732  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3733  [(set_attr "type" "neon_fp_mul_<stype>")]
3734 )
3735
3736 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3737
3738 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3739   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3740         (unspec:VDQSF
3741          [(match_operand:VDQSF 1 "register_operand" "w")
3742           (vec_duplicate:VDQSF
3743            (vec_select:<VEL>
3744             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3745             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3746          UNSPEC_FMULX))]
3747   "TARGET_SIMD"
3748   {
3749     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3750     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3751   }
3752   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3753 )
3754
3755 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3756
3757 (define_insn "*aarch64_mulx_elt<mode>"
3758   [(set (match_operand:VDQF 0 "register_operand" "=w")
3759         (unspec:VDQF
3760          [(match_operand:VDQF 1 "register_operand" "w")
3761           (vec_duplicate:VDQF
3762            (vec_select:<VEL>
3763             (match_operand:VDQF 2 "register_operand" "w")
3764             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3765          UNSPEC_FMULX))]
3766   "TARGET_SIMD"
3767   {
3768     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3769     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3770   }
3771   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3772 )
3773
3774 ;; vmulxq_lane
3775
3776 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3777   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3778         (unspec:VHSDF
3779          [(match_operand:VHSDF 1 "register_operand" "w")
3780           (vec_duplicate:VHSDF
3781             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3782          UNSPEC_FMULX))]
3783   "TARGET_SIMD"
3784   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3785   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3786 )
3787
3788 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3789 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3790 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3791
3792 (define_insn "*aarch64_vgetfmulx<mode>"
3793   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3794         (unspec:<VEL>
3795          [(match_operand:<VEL> 1 "register_operand" "w")
3796           (vec_select:<VEL>
3797            (match_operand:VDQF 2 "register_operand" "w")
3798             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3799          UNSPEC_FMULX))]
3800   "TARGET_SIMD"
3801   {
3802     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3803     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3804   }
3805   [(set_attr "type" "fmul<Vetype>")]
3806 )
3807 ;; <su>q<addsub>
3808
3809 (define_insn "aarch64_<su_optab>q<addsub><mode>"
3810   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3811         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3812                         (match_operand:VSDQ_I 2 "register_operand" "w")))]
3813   "TARGET_SIMD"
3814   "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3815   [(set_attr "type" "neon_q<addsub><q>")]
3816 )
3817
3818 ;; suqadd and usqadd
3819
3820 (define_insn "aarch64_<sur>qadd<mode>"
3821   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3822         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3823                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3824                        USSUQADD))]
3825   "TARGET_SIMD"
3826   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3827   [(set_attr "type" "neon_qadd<q>")]
3828 )
3829
3830 ;; sqmovun
3831
3832 (define_insn "aarch64_sqmovun<mode>"
3833   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3834         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3835                             UNSPEC_SQXTUN))]
3836    "TARGET_SIMD"
3837    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3838    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3839 )
3840
3841 ;; sqmovn and uqmovn
3842
3843 (define_insn "aarch64_<sur>qmovn<mode>"
3844   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3845         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3846                             SUQMOVN))]
3847   "TARGET_SIMD"
3848   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3849    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3850 )
3851
3852 ;; <su>q<absneg>
3853
3854 (define_insn "aarch64_s<optab><mode>"
3855   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3856         (UNQOPS:VSDQ_I
3857           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3858   "TARGET_SIMD"
3859   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3860   [(set_attr "type" "neon_<optab><q>")]
3861 )
3862
3863 ;; sq<r>dmulh.
3864
3865 (define_insn "aarch64_sq<r>dmulh<mode>"
3866   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3867         (unspec:VSDQ_HSI
3868           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3869            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3870          VQDMULH))]
3871   "TARGET_SIMD"
3872   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3873   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3874 )
3875
3876 ;; sq<r>dmulh_lane
3877
3878 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3879   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3880         (unspec:VDQHS
3881           [(match_operand:VDQHS 1 "register_operand" "w")
3882            (vec_select:<VEL>
3883              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3884              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3885          VQDMULH))]
3886   "TARGET_SIMD"
3887   "*
3888    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3889    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3890   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3891 )
3892
3893 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3894   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3895         (unspec:VDQHS
3896           [(match_operand:VDQHS 1 "register_operand" "w")
3897            (vec_select:<VEL>
3898              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3899              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3900          VQDMULH))]
3901   "TARGET_SIMD"
3902   "*
3903    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3904    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3905   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3906 )
3907
3908 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3909   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3910         (unspec:SD_HSI
3911           [(match_operand:SD_HSI 1 "register_operand" "w")
3912            (vec_select:<VEL>
3913              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3914              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3915          VQDMULH))]
3916   "TARGET_SIMD"
3917   "*
3918    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3919    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3920   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3921 )
3922
3923 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3924   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3925         (unspec:SD_HSI
3926           [(match_operand:SD_HSI 1 "register_operand" "w")
3927            (vec_select:<VEL>
3928              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3929              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3930          VQDMULH))]
3931   "TARGET_SIMD"
3932   "*
3933    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3934    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3935   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3936 )
3937
3938 ;; sqrdml[as]h.
3939
3940 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3941   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3942         (unspec:VSDQ_HSI
3943           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3944            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3945            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3946           SQRDMLH_AS))]
3947    "TARGET_SIMD_RDMA"
3948    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3949    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3950 )
3951
3952 ;; sqrdml[as]h_lane.
3953
3954 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3955   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3956         (unspec:VDQHS
3957           [(match_operand:VDQHS 1 "register_operand" "0")
3958            (match_operand:VDQHS 2 "register_operand" "w")
3959            (vec_select:<VEL>
3960              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3961              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3962           SQRDMLH_AS))]
3963    "TARGET_SIMD_RDMA"
3964    {
3965      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3966      return
3967       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3968    }
3969    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3970 )
3971
3972 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3973   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3974         (unspec:SD_HSI
3975           [(match_operand:SD_HSI 1 "register_operand" "0")
3976            (match_operand:SD_HSI 2 "register_operand" "w")
3977            (vec_select:<VEL>
3978              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3979              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3980           SQRDMLH_AS))]
3981    "TARGET_SIMD_RDMA"
3982    {
3983      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3984      return
3985       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3986    }
3987    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3988 )
3989
3990 ;; sqrdml[as]h_laneq.
3991
3992 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3993   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3994         (unspec:VDQHS
3995           [(match_operand:VDQHS 1 "register_operand" "0")
3996            (match_operand:VDQHS 2 "register_operand" "w")
3997            (vec_select:<VEL>
3998              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3999              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4000           SQRDMLH_AS))]
4001    "TARGET_SIMD_RDMA"
4002    {
4003      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4004      return
4005       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4006    }
4007    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4008 )
4009
4010 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4011   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4012         (unspec:SD_HSI
4013           [(match_operand:SD_HSI 1 "register_operand" "0")
4014            (match_operand:SD_HSI 2 "register_operand" "w")
4015            (vec_select:<VEL>
4016              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4017              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4018           SQRDMLH_AS))]
4019    "TARGET_SIMD_RDMA"
4020    {
4021      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4022      return
4023       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
4024    }
4025    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4026 )
4027
4028 ;; vqdml[sa]l
4029
4030 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
4031   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4032         (SBINQOPS:<VWIDE>
4033           (match_operand:<VWIDE> 1 "register_operand" "0")
4034           (ss_ashift:<VWIDE>
4035               (mult:<VWIDE>
4036                 (sign_extend:<VWIDE>
4037                       (match_operand:VSD_HSI 2 "register_operand" "w"))
4038                 (sign_extend:<VWIDE>
4039                       (match_operand:VSD_HSI 3 "register_operand" "w")))
4040               (const_int 1))))]
4041   "TARGET_SIMD"
4042   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4043   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4044 )
4045
4046 ;; vqdml[sa]l_lane
4047
4048 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
4049   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4050         (SBINQOPS:<VWIDE>
4051           (match_operand:<VWIDE> 1 "register_operand" "0")
4052           (ss_ashift:<VWIDE>
4053             (mult:<VWIDE>
4054               (sign_extend:<VWIDE>
4055                 (match_operand:VD_HSI 2 "register_operand" "w"))
4056               (sign_extend:<VWIDE>
4057                 (vec_duplicate:VD_HSI
4058                   (vec_select:<VEL>
4059                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4060                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4061               ))
4062             (const_int 1))))]
4063   "TARGET_SIMD"
4064   {
4065     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4066     return
4067       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4068   }
4069   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4070 )
4071
4072 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4073   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4074         (SBINQOPS:<VWIDE>
4075           (match_operand:<VWIDE> 1 "register_operand" "0")
4076           (ss_ashift:<VWIDE>
4077             (mult:<VWIDE>
4078               (sign_extend:<VWIDE>
4079                 (match_operand:VD_HSI 2 "register_operand" "w"))
4080               (sign_extend:<VWIDE>
4081                 (vec_duplicate:VD_HSI
4082                   (vec_select:<VEL>
4083                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4084                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4085               ))
4086             (const_int 1))))]
4087   "TARGET_SIMD"
4088   {
4089     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4090     return
4091       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4092   }
4093   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4094 )
4095
4096 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
4097   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4098         (SBINQOPS:<VWIDE>
4099           (match_operand:<VWIDE> 1 "register_operand" "0")
4100           (ss_ashift:<VWIDE>
4101             (mult:<VWIDE>
4102               (sign_extend:<VWIDE>
4103                 (match_operand:SD_HSI 2 "register_operand" "w"))
4104               (sign_extend:<VWIDE>
4105                 (vec_select:<VEL>
4106                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4107                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4108               )
4109             (const_int 1))))]
4110   "TARGET_SIMD"
4111   {
4112     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4113     return
4114       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4115   }
4116   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4117 )
4118
4119 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4120   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4121         (SBINQOPS:<VWIDE>
4122           (match_operand:<VWIDE> 1 "register_operand" "0")
4123           (ss_ashift:<VWIDE>
4124             (mult:<VWIDE>
4125               (sign_extend:<VWIDE>
4126                 (match_operand:SD_HSI 2 "register_operand" "w"))
4127               (sign_extend:<VWIDE>
4128                 (vec_select:<VEL>
4129                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4130                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4131               )
4132             (const_int 1))))]
4133   "TARGET_SIMD"
4134   {
4135     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4136     return
4137       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4138   }
4139   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4140 )
4141
4142 ;; vqdml[sa]l_n
4143
4144 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4145   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4146         (SBINQOPS:<VWIDE>
4147           (match_operand:<VWIDE> 1 "register_operand" "0")
4148           (ss_ashift:<VWIDE>
4149               (mult:<VWIDE>
4150                 (sign_extend:<VWIDE>
4151                       (match_operand:VD_HSI 2 "register_operand" "w"))
4152                 (sign_extend:<VWIDE>
4153                   (vec_duplicate:VD_HSI
4154                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4155               (const_int 1))))]
4156   "TARGET_SIMD"
4157   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4158   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4159 )
4160
4161 ;; sqdml[as]l2
4162
4163 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4164   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4165         (SBINQOPS:<VWIDE>
4166          (match_operand:<VWIDE> 1 "register_operand" "0")
4167          (ss_ashift:<VWIDE>
4168              (mult:<VWIDE>
4169                (sign_extend:<VWIDE>
4170                  (vec_select:<VHALF>
4171                      (match_operand:VQ_HSI 2 "register_operand" "w")
4172                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4173                (sign_extend:<VWIDE>
4174                  (vec_select:<VHALF>
4175                      (match_operand:VQ_HSI 3 "register_operand" "w")
4176                      (match_dup 4))))
4177              (const_int 1))))]
4178   "TARGET_SIMD"
4179   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4180   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4181 )
4182
4183 (define_expand "aarch64_sqdmlal2<mode>"
4184   [(match_operand:<VWIDE> 0 "register_operand")
4185    (match_operand:<VWIDE> 1 "register_operand")
4186    (match_operand:VQ_HSI 2 "register_operand")
4187    (match_operand:VQ_HSI 3 "register_operand")]
4188   "TARGET_SIMD"
4189 {
4190   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4191   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4192                                                   operands[2], operands[3], p));
4193   DONE;
4194 })
4195
4196 (define_expand "aarch64_sqdmlsl2<mode>"
4197   [(match_operand:<VWIDE> 0 "register_operand")
4198    (match_operand:<VWIDE> 1 "register_operand")
4199    (match_operand:VQ_HSI 2 "register_operand")
4200    (match_operand:VQ_HSI 3 "register_operand")]
4201   "TARGET_SIMD"
4202 {
4203   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4204   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4205                                                   operands[2], operands[3], p));
4206   DONE;
4207 })
4208
4209 ;; vqdml[sa]l2_lane
4210
4211 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4212   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4213         (SBINQOPS:<VWIDE>
4214           (match_operand:<VWIDE> 1 "register_operand" "0")
4215           (ss_ashift:<VWIDE>
4216               (mult:<VWIDE>
4217                 (sign_extend:<VWIDE>
4218                   (vec_select:<VHALF>
4219                     (match_operand:VQ_HSI 2 "register_operand" "w")
4220                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4221                 (sign_extend:<VWIDE>
4222                   (vec_duplicate:<VHALF>
4223                     (vec_select:<VEL>
4224                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4225                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4226                     ))))
4227               (const_int 1))))]
4228   "TARGET_SIMD"
4229   {
4230     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4231     return
4232      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4233   }
4234   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4235 )
4236
4237 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4238   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4239         (SBINQOPS:<VWIDE>
4240           (match_operand:<VWIDE> 1 "register_operand" "0")
4241           (ss_ashift:<VWIDE>
4242               (mult:<VWIDE>
4243                 (sign_extend:<VWIDE>
4244                   (vec_select:<VHALF>
4245                     (match_operand:VQ_HSI 2 "register_operand" "w")
4246                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4247                 (sign_extend:<VWIDE>
4248                   (vec_duplicate:<VHALF>
4249                     (vec_select:<VEL>
4250                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4251                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4252                     ))))
4253               (const_int 1))))]
4254   "TARGET_SIMD"
4255   {
4256     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4257     return
4258      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4259   }
4260   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4261 )
4262
4263 (define_expand "aarch64_sqdmlal2_lane<mode>"
4264   [(match_operand:<VWIDE> 0 "register_operand")
4265    (match_operand:<VWIDE> 1 "register_operand")
4266    (match_operand:VQ_HSI 2 "register_operand")
4267    (match_operand:<VCOND> 3 "register_operand")
4268    (match_operand:SI 4 "immediate_operand")]
4269   "TARGET_SIMD"
4270 {
4271   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4272   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4273                                                        operands[2], operands[3],
4274                                                        operands[4], p));
4275   DONE;
4276 })
4277
4278 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4279   [(match_operand:<VWIDE> 0 "register_operand")
4280    (match_operand:<VWIDE> 1 "register_operand")
4281    (match_operand:VQ_HSI 2 "register_operand")
4282    (match_operand:<VCONQ> 3 "register_operand")
4283    (match_operand:SI 4 "immediate_operand")]
4284   "TARGET_SIMD"
4285 {
4286   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4287   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4288                                                        operands[2], operands[3],
4289                                                        operands[4], p));
4290   DONE;
4291 })
4292
4293 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4294   [(match_operand:<VWIDE> 0 "register_operand")
4295    (match_operand:<VWIDE> 1 "register_operand")
4296    (match_operand:VQ_HSI 2 "register_operand")
4297    (match_operand:<VCOND> 3 "register_operand")
4298    (match_operand:SI 4 "immediate_operand")]
4299   "TARGET_SIMD"
4300 {
4301   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4302   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4303                                                        operands[2], operands[3],
4304                                                        operands[4], p));
4305   DONE;
4306 })
4307
4308 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4309   [(match_operand:<VWIDE> 0 "register_operand")
4310    (match_operand:<VWIDE> 1 "register_operand")
4311    (match_operand:VQ_HSI 2 "register_operand")
4312    (match_operand:<VCONQ> 3 "register_operand")
4313    (match_operand:SI 4 "immediate_operand")]
4314   "TARGET_SIMD"
4315 {
4316   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4317   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4318                                                        operands[2], operands[3],
4319                                                        operands[4], p));
4320   DONE;
4321 })
4322
4323 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4324   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4325         (SBINQOPS:<VWIDE>
4326           (match_operand:<VWIDE> 1 "register_operand" "0")
4327           (ss_ashift:<VWIDE>
4328             (mult:<VWIDE>
4329               (sign_extend:<VWIDE>
4330                 (vec_select:<VHALF>
4331                   (match_operand:VQ_HSI 2 "register_operand" "w")
4332                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4333               (sign_extend:<VWIDE>
4334                 (vec_duplicate:<VHALF>
4335                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4336             (const_int 1))))]
4337   "TARGET_SIMD"
4338   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4339   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4340 )
4341
4342 (define_expand "aarch64_sqdmlal2_n<mode>"
4343   [(match_operand:<VWIDE> 0 "register_operand")
4344    (match_operand:<VWIDE> 1 "register_operand")
4345    (match_operand:VQ_HSI 2 "register_operand")
4346    (match_operand:<VEL> 3 "register_operand")]
4347   "TARGET_SIMD"
4348 {
4349   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4350   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4351                                                     operands[2], operands[3],
4352                                                     p));
4353   DONE;
4354 })
4355
4356 (define_expand "aarch64_sqdmlsl2_n<mode>"
4357   [(match_operand:<VWIDE> 0 "register_operand")
4358    (match_operand:<VWIDE> 1 "register_operand")
4359    (match_operand:VQ_HSI 2 "register_operand")
4360    (match_operand:<VEL> 3 "register_operand")]
4361   "TARGET_SIMD"
4362 {
4363   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4364   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4365                                                     operands[2], operands[3],
4366                                                     p));
4367   DONE;
4368 })
4369
4370 ;; vqdmull
4371
4372 (define_insn "aarch64_sqdmull<mode>"
4373   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4374         (ss_ashift:<VWIDE>
4375              (mult:<VWIDE>
4376                (sign_extend:<VWIDE>
4377                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4378                (sign_extend:<VWIDE>
4379                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4380              (const_int 1)))]
4381   "TARGET_SIMD"
4382   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4383   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4384 )
4385
4386 ;; vqdmull_lane
4387
4388 (define_insn "aarch64_sqdmull_lane<mode>"
4389   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4390         (ss_ashift:<VWIDE>
4391              (mult:<VWIDE>
4392                (sign_extend:<VWIDE>
4393                  (match_operand:VD_HSI 1 "register_operand" "w"))
4394                (sign_extend:<VWIDE>
4395                  (vec_duplicate:VD_HSI
4396                    (vec_select:<VEL>
4397                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4398                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4399                ))
4400              (const_int 1)))]
4401   "TARGET_SIMD"
4402   {
4403     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4404     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4405   }
4406   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4407 )
4408
4409 (define_insn "aarch64_sqdmull_laneq<mode>"
4410   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4411         (ss_ashift:<VWIDE>
4412              (mult:<VWIDE>
4413                (sign_extend:<VWIDE>
4414                  (match_operand:VD_HSI 1 "register_operand" "w"))
4415                (sign_extend:<VWIDE>
4416                  (vec_duplicate:VD_HSI
4417                    (vec_select:<VEL>
4418                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4419                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4420                ))
4421              (const_int 1)))]
4422   "TARGET_SIMD"
4423   {
4424     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4425     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4426   }
4427   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4428 )
4429
4430 (define_insn "aarch64_sqdmull_lane<mode>"
4431   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4432         (ss_ashift:<VWIDE>
4433              (mult:<VWIDE>
4434                (sign_extend:<VWIDE>
4435                  (match_operand:SD_HSI 1 "register_operand" "w"))
4436                (sign_extend:<VWIDE>
4437                  (vec_select:<VEL>
4438                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4439                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4440                ))
4441              (const_int 1)))]
4442   "TARGET_SIMD"
4443   {
4444     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4445     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4446   }
4447   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4448 )
4449
4450 (define_insn "aarch64_sqdmull_laneq<mode>"
4451   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4452         (ss_ashift:<VWIDE>
4453              (mult:<VWIDE>
4454                (sign_extend:<VWIDE>
4455                  (match_operand:SD_HSI 1 "register_operand" "w"))
4456                (sign_extend:<VWIDE>
4457                  (vec_select:<VEL>
4458                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4459                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4460                ))
4461              (const_int 1)))]
4462   "TARGET_SIMD"
4463   {
4464     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4465     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4466   }
4467   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4468 )
4469
4470 ;; vqdmull_n
4471
4472 (define_insn "aarch64_sqdmull_n<mode>"
4473   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4474         (ss_ashift:<VWIDE>
4475              (mult:<VWIDE>
4476                (sign_extend:<VWIDE>
4477                  (match_operand:VD_HSI 1 "register_operand" "w"))
4478                (sign_extend:<VWIDE>
4479                  (vec_duplicate:VD_HSI
4480                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4481                )
4482              (const_int 1)))]
4483   "TARGET_SIMD"
4484   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4485   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4486 )
4487
4488 ;; vqdmull2
4489
4490
4491
4492 (define_insn "aarch64_sqdmull2<mode>_internal"
4493   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4494         (ss_ashift:<VWIDE>
4495              (mult:<VWIDE>
4496                (sign_extend:<VWIDE>
4497                  (vec_select:<VHALF>
4498                    (match_operand:VQ_HSI 1 "register_operand" "w")
4499                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4500                (sign_extend:<VWIDE>
4501                  (vec_select:<VHALF>
4502                    (match_operand:VQ_HSI 2 "register_operand" "w")
4503                    (match_dup 3)))
4504                )
4505              (const_int 1)))]
4506   "TARGET_SIMD"
4507   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4508   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4509 )
4510
4511 (define_expand "aarch64_sqdmull2<mode>"
4512   [(match_operand:<VWIDE> 0 "register_operand")
4513    (match_operand:VQ_HSI 1 "register_operand")
4514    (match_operand:VQ_HSI 2 "register_operand")]
4515   "TARGET_SIMD"
4516 {
4517   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4518   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4519                                                   operands[2], p));
4520   DONE;
4521 })
4522
4523 ;; vqdmull2_lane
4524
4525 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4526   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4527         (ss_ashift:<VWIDE>
4528              (mult:<VWIDE>
4529                (sign_extend:<VWIDE>
4530                  (vec_select:<VHALF>
4531                    (match_operand:VQ_HSI 1 "register_operand" "w")
4532                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4533                (sign_extend:<VWIDE>
4534                  (vec_duplicate:<VHALF>
4535                    (vec_select:<VEL>
4536                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4537                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4538                ))
4539              (const_int 1)))]
4540   "TARGET_SIMD"
4541   {
4542     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4543     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4544   }
4545   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4546 )
4547
4548 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4549   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4550         (ss_ashift:<VWIDE>
4551              (mult:<VWIDE>
4552                (sign_extend:<VWIDE>
4553                  (vec_select:<VHALF>
4554                    (match_operand:VQ_HSI 1 "register_operand" "w")
4555                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4556                (sign_extend:<VWIDE>
4557                  (vec_duplicate:<VHALF>
4558                    (vec_select:<VEL>
4559                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4560                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4561                ))
4562              (const_int 1)))]
4563   "TARGET_SIMD"
4564   {
4565     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4566     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4567   }
4568   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4569 )
4570
4571 (define_expand "aarch64_sqdmull2_lane<mode>"
4572   [(match_operand:<VWIDE> 0 "register_operand")
4573    (match_operand:VQ_HSI 1 "register_operand")
4574    (match_operand:<VCOND> 2 "register_operand")
4575    (match_operand:SI 3 "immediate_operand")]
4576   "TARGET_SIMD"
4577 {
4578   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4579   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4580                                                        operands[2], operands[3],
4581                                                        p));
4582   DONE;
4583 })
4584
4585 (define_expand "aarch64_sqdmull2_laneq<mode>"
4586   [(match_operand:<VWIDE> 0 "register_operand")
4587    (match_operand:VQ_HSI 1 "register_operand")
4588    (match_operand:<VCONQ> 2 "register_operand")
4589    (match_operand:SI 3 "immediate_operand")]
4590   "TARGET_SIMD"
4591 {
4592   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4593   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4594                                                        operands[2], operands[3],
4595                                                        p));
4596   DONE;
4597 })
4598
4599 ;; vqdmull2_n
4600
4601 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4602   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4603         (ss_ashift:<VWIDE>
4604              (mult:<VWIDE>
4605                (sign_extend:<VWIDE>
4606                  (vec_select:<VHALF>
4607                    (match_operand:VQ_HSI 1 "register_operand" "w")
4608                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4609                (sign_extend:<VWIDE>
4610                  (vec_duplicate:<VHALF>
4611                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4612                )
4613              (const_int 1)))]
4614   "TARGET_SIMD"
4615   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4616   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4617 )
4618
4619 (define_expand "aarch64_sqdmull2_n<mode>"
4620   [(match_operand:<VWIDE> 0 "register_operand")
4621    (match_operand:VQ_HSI 1 "register_operand")
4622    (match_operand:<VEL> 2 "register_operand")]
4623   "TARGET_SIMD"
4624 {
4625   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4626   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4627                                                     operands[2], p));
4628   DONE;
4629 })
4630
4631 ;; vshl
4632
4633 (define_insn "aarch64_<sur>shl<mode>"
4634   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4635         (unspec:VSDQ_I_DI
4636           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4637            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4638          VSHL))]
4639   "TARGET_SIMD"
4640   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4641   [(set_attr "type" "neon_shift_reg<q>")]
4642 )
4643
4644
4645 ;; vqshl
4646
4647 (define_insn "aarch64_<sur>q<r>shl<mode>"
4648   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4649         (unspec:VSDQ_I
4650           [(match_operand:VSDQ_I 1 "register_operand" "w")
4651            (match_operand:VSDQ_I 2 "register_operand" "w")]
4652          VQSHL))]
4653   "TARGET_SIMD"
4654   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4655   [(set_attr "type" "neon_sat_shift_reg<q>")]
4656 )
4657
4658 ;; vshll_n
4659
4660 (define_insn "aarch64_<sur>shll_n<mode>"
4661   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4662         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4663                          (match_operand:SI 2
4664                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4665                          VSHLL))]
4666   "TARGET_SIMD"
4667   {
4668     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4669       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4670     else
4671       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4672   }
4673   [(set_attr "type" "neon_shift_imm_long")]
4674 )
4675
4676 ;; vshll_high_n
4677
4678 (define_insn "aarch64_<sur>shll2_n<mode>"
4679   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4680         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4681                          (match_operand:SI 2 "immediate_operand" "i")]
4682                          VSHLL))]
4683   "TARGET_SIMD"
4684   {
4685     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4686       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4687     else
4688       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4689   }
4690   [(set_attr "type" "neon_shift_imm_long")]
4691 )
4692
4693 ;; vrshr_n
4694
4695 (define_insn "aarch64_<sur>shr_n<mode>"
4696   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4697         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4698                            (match_operand:SI 2
4699                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4700                           VRSHR_N))]
4701   "TARGET_SIMD"
4702   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4703   [(set_attr "type" "neon_sat_shift_imm<q>")]
4704 )
4705
4706 ;; v(r)sra_n
4707
4708 (define_insn "aarch64_<sur>sra_n<mode>"
4709   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4710         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4711                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4712                        (match_operand:SI 3
4713                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4714                       VSRA))]
4715   "TARGET_SIMD"
4716   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4717   [(set_attr "type" "neon_shift_acc<q>")]
4718 )
4719
4720 ;; vs<lr>i_n
4721
4722 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4723   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4724         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4725                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4726                        (match_operand:SI 3
4727                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4728                       VSLRI))]
4729   "TARGET_SIMD"
4730   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4731   [(set_attr "type" "neon_shift_imm<q>")]
4732 )
4733
4734 ;; vqshl(u)
4735
4736 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4737   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4738         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4739                        (match_operand:SI 2
4740                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4741                       VQSHL_N))]
4742   "TARGET_SIMD"
4743   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4744   [(set_attr "type" "neon_sat_shift_imm<q>")]
4745 )
4746
4747
4748 ;; vq(r)shr(u)n_n
4749
4750 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4751   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4752         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4753                             (match_operand:SI 2
4754                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4755                            VQSHRN_N))]
4756   "TARGET_SIMD"
4757   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4758   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4759 )
4760
4761
4762 ;; cm(eq|ge|gt|lt|le)
4763 ;; Note, we have constraints for Dz and Z as different expanders
4764 ;; have different ideas of what should be passed to this pattern.
4765
4766 (define_insn "aarch64_cm<optab><mode>"
4767   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4768         (neg:<V_INT_EQUIV>
4769           (COMPARISONS:<V_INT_EQUIV>
4770             (match_operand:VDQ_I 1 "register_operand" "w,w")
4771             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4772           )))]
4773   "TARGET_SIMD"
4774   "@
4775   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4776   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4777   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4778 )
4779
4780 (define_insn_and_split "aarch64_cm<optab>di"
4781   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4782         (neg:DI
4783           (COMPARISONS:DI
4784             (match_operand:DI 1 "register_operand" "w,w,r")
4785             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4786           )))
4787      (clobber (reg:CC CC_REGNUM))]
4788   "TARGET_SIMD"
4789   "#"
4790   "&& reload_completed"
4791   [(set (match_operand:DI 0 "register_operand")
4792         (neg:DI
4793           (COMPARISONS:DI
4794             (match_operand:DI 1 "register_operand")
4795             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4796           )))]
4797   {
4798     /* If we are in the general purpose register file,
4799        we split to a sequence of comparison and store.  */
4800     if (GP_REGNUM_P (REGNO (operands[0]))
4801         && GP_REGNUM_P (REGNO (operands[1])))
4802       {
4803         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4804         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4805         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4806         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4807         DONE;
4808       }
4809     /* Otherwise, we expand to a similar pattern which does not
4810        clobber CC_REGNUM.  */
4811   }
4812   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4813 )
4814
4815 (define_insn "*aarch64_cm<optab>di"
4816   [(set (match_operand:DI 0 "register_operand" "=w,w")
4817         (neg:DI
4818           (COMPARISONS:DI
4819             (match_operand:DI 1 "register_operand" "w,w")
4820             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4821           )))]
4822   "TARGET_SIMD && reload_completed"
4823   "@
4824   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4825   cm<optab>\t%d0, %d1, #0"
4826   [(set_attr "type" "neon_compare, neon_compare_zero")]
4827 )
4828
4829 ;; cm(hs|hi)
4830
4831 (define_insn "aarch64_cm<optab><mode>"
4832   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4833         (neg:<V_INT_EQUIV>
4834           (UCOMPARISONS:<V_INT_EQUIV>
4835             (match_operand:VDQ_I 1 "register_operand" "w")
4836             (match_operand:VDQ_I 2 "register_operand" "w")
4837           )))]
4838   "TARGET_SIMD"
4839   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4840   [(set_attr "type" "neon_compare<q>")]
4841 )
4842
4843 (define_insn_and_split "aarch64_cm<optab>di"
4844   [(set (match_operand:DI 0 "register_operand" "=w,r")
4845         (neg:DI
4846           (UCOMPARISONS:DI
4847             (match_operand:DI 1 "register_operand" "w,r")
4848             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4849           )))
4850     (clobber (reg:CC CC_REGNUM))]
4851   "TARGET_SIMD"
4852   "#"
4853   "&& reload_completed"
4854   [(set (match_operand:DI 0 "register_operand")
4855         (neg:DI
4856           (UCOMPARISONS:DI
4857             (match_operand:DI 1 "register_operand")
4858             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4859           )))]
4860   {
4861     /* If we are in the general purpose register file,
4862        we split to a sequence of comparison and store.  */
4863     if (GP_REGNUM_P (REGNO (operands[0]))
4864         && GP_REGNUM_P (REGNO (operands[1])))
4865       {
4866         machine_mode mode = CCmode;
4867         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4868         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4869         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4870         DONE;
4871       }
4872     /* Otherwise, we expand to a similar pattern which does not
4873        clobber CC_REGNUM.  */
4874   }
4875   [(set_attr "type" "neon_compare,multiple")]
4876 )
4877
4878 (define_insn "*aarch64_cm<optab>di"
4879   [(set (match_operand:DI 0 "register_operand" "=w")
4880         (neg:DI
4881           (UCOMPARISONS:DI
4882             (match_operand:DI 1 "register_operand" "w")
4883             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4884           )))]
4885   "TARGET_SIMD && reload_completed"
4886   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4887   [(set_attr "type" "neon_compare")]
4888 )
4889
4890 ;; cmtst
4891
4892 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4893 ;; we don't have any insns using ne, and aarch64_vcond outputs
4894 ;; not (neg (eq (and x y) 0))
4895 ;; which is rewritten by simplify_rtx as
4896 ;; plus (eq (and x y) 0) -1.
4897
4898 (define_insn "aarch64_cmtst<mode>"
4899   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4900         (plus:<V_INT_EQUIV>
4901           (eq:<V_INT_EQUIV>
4902             (and:VDQ_I
4903               (match_operand:VDQ_I 1 "register_operand" "w")
4904               (match_operand:VDQ_I 2 "register_operand" "w"))
4905             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4906           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4907   ]
4908   "TARGET_SIMD"
4909   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4910   [(set_attr "type" "neon_tst<q>")]
4911 )
4912
4913 (define_insn_and_split "aarch64_cmtstdi"
4914   [(set (match_operand:DI 0 "register_operand" "=w,r")
4915         (neg:DI
4916           (ne:DI
4917             (and:DI
4918               (match_operand:DI 1 "register_operand" "w,r")
4919               (match_operand:DI 2 "register_operand" "w,r"))
4920             (const_int 0))))
4921     (clobber (reg:CC CC_REGNUM))]
4922   "TARGET_SIMD"
4923   "#"
4924   "&& reload_completed"
4925   [(set (match_operand:DI 0 "register_operand")
4926         (neg:DI
4927           (ne:DI
4928             (and:DI
4929               (match_operand:DI 1 "register_operand")
4930               (match_operand:DI 2 "register_operand"))
4931             (const_int 0))))]
4932   {
4933     /* If we are in the general purpose register file,
4934        we split to a sequence of comparison and store.  */
4935     if (GP_REGNUM_P (REGNO (operands[0]))
4936         && GP_REGNUM_P (REGNO (operands[1])))
4937       {
4938         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4939         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4940         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4941         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4942         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4943         DONE;
4944       }
4945     /* Otherwise, we expand to a similar pattern which does not
4946        clobber CC_REGNUM.  */
4947   }
4948   [(set_attr "type" "neon_tst,multiple")]
4949 )
4950
4951 (define_insn "*aarch64_cmtstdi"
4952   [(set (match_operand:DI 0 "register_operand" "=w")
4953         (neg:DI
4954           (ne:DI
4955             (and:DI
4956               (match_operand:DI 1 "register_operand" "w")
4957               (match_operand:DI 2 "register_operand" "w"))
4958             (const_int 0))))]
4959   "TARGET_SIMD"
4960   "cmtst\t%d0, %d1, %d2"
4961   [(set_attr "type" "neon_tst")]
4962 )
4963
4964 ;; fcm(eq|ge|gt|le|lt)
4965
4966 (define_insn "aarch64_cm<optab><mode>"
4967   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4968         (neg:<V_INT_EQUIV>
4969           (COMPARISONS:<V_INT_EQUIV>
4970             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4971             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4972           )))]
4973   "TARGET_SIMD"
4974   "@
4975   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4976   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4977   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4978 )
4979
4980 ;; fac(ge|gt)
4981 ;; Note we can also handle what would be fac(le|lt) by
4982 ;; generating fac(ge|gt).
4983
4984 (define_insn "aarch64_fac<optab><mode>"
4985   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4986         (neg:<V_INT_EQUIV>
4987           (FAC_COMPARISONS:<V_INT_EQUIV>
4988             (abs:VHSDF_HSDF
4989               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4990             (abs:VHSDF_HSDF
4991               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4992   )))]
4993   "TARGET_SIMD"
4994   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4995   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4996 )
4997
4998 ;; addp
4999
5000 (define_insn "aarch64_addp<mode>"
5001   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
5002         (unspec:VD_BHSI
5003           [(match_operand:VD_BHSI 1 "register_operand" "w")
5004            (match_operand:VD_BHSI 2 "register_operand" "w")]
5005           UNSPEC_ADDP))]
5006   "TARGET_SIMD"
5007   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5008   [(set_attr "type" "neon_reduc_add<q>")]
5009 )
5010
5011 (define_insn "aarch64_addpdi"
5012   [(set (match_operand:DI 0 "register_operand" "=w")
5013         (unspec:DI
5014           [(match_operand:V2DI 1 "register_operand" "w")]
5015           UNSPEC_ADDP))]
5016   "TARGET_SIMD"
5017   "addp\t%d0, %1.2d"
5018   [(set_attr "type" "neon_reduc_add")]
5019 )
5020
5021 ;; sqrt
5022
5023 (define_expand "sqrt<mode>2"
5024   [(set (match_operand:VHSDF 0 "register_operand")
5025         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
5026   "TARGET_SIMD"
5027 {
5028   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
5029     DONE;
5030 })
5031
5032 (define_insn "*sqrt<mode>2"
5033   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5034         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
5035   "TARGET_SIMD"
5036   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
5037   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
5038 )
5039
5040 ;; Patterns for vector struct loads and stores.
5041
5042 (define_insn "aarch64_simd_ld2<mode>"
5043   [(set (match_operand:OI 0 "register_operand" "=w")
5044         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5045                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5046                    UNSPEC_LD2))]
5047   "TARGET_SIMD"
5048   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5049   [(set_attr "type" "neon_load2_2reg<q>")]
5050 )
5051
5052 (define_insn "aarch64_simd_ld2r<mode>"
5053   [(set (match_operand:OI 0 "register_operand" "=w")
5054        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5055                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5056                   UNSPEC_LD2_DUP))]
5057   "TARGET_SIMD"
5058   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5059   [(set_attr "type" "neon_load2_all_lanes<q>")]
5060 )
5061
5062 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
5063   [(set (match_operand:OI 0 "register_operand" "=w")
5064         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5065                     (match_operand:OI 2 "register_operand" "0")
5066                     (match_operand:SI 3 "immediate_operand" "i")
5067                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5068                    UNSPEC_LD2_LANE))]
5069   "TARGET_SIMD"
5070   {
5071     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5072     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
5073   }
5074   [(set_attr "type" "neon_load2_one_lane")]
5075 )
5076
5077 (define_expand "vec_load_lanesoi<mode>"
5078   [(set (match_operand:OI 0 "register_operand")
5079         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
5080                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5081                    UNSPEC_LD2))]
5082   "TARGET_SIMD"
5083 {
5084   if (BYTES_BIG_ENDIAN)
5085     {
5086       rtx tmp = gen_reg_rtx (OImode);
5087       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5088       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
5089       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
5090     }
5091   else
5092     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
5093   DONE;
5094 })
5095
5096 (define_insn "aarch64_simd_st2<mode>"
5097   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5098         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
5099                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5100                    UNSPEC_ST2))]
5101   "TARGET_SIMD"
5102   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5103   [(set_attr "type" "neon_store2_2reg<q>")]
5104 )
5105
5106 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5107 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
5108   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5109         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5110                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5111                     (match_operand:SI 2 "immediate_operand" "i")]
5112                    UNSPEC_ST2_LANE))]
5113   "TARGET_SIMD"
5114   {
5115     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5116     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5117   }
5118   [(set_attr "type" "neon_store2_one_lane<q>")]
5119 )
5120
5121 (define_expand "vec_store_lanesoi<mode>"
5122   [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5123         (unspec:OI [(match_operand:OI 1 "register_operand")
5124                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5125                    UNSPEC_ST2))]
5126   "TARGET_SIMD"
5127 {
5128   if (BYTES_BIG_ENDIAN)
5129     {
5130       rtx tmp = gen_reg_rtx (OImode);
5131       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5132       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5133       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5134     }
5135   else
5136     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5137   DONE;
5138 })
5139
5140 (define_insn "aarch64_simd_ld3<mode>"
5141   [(set (match_operand:CI 0 "register_operand" "=w")
5142         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5143                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5144                    UNSPEC_LD3))]
5145   "TARGET_SIMD"
5146   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5147   [(set_attr "type" "neon_load3_3reg<q>")]
5148 )
5149
5150 (define_insn "aarch64_simd_ld3r<mode>"
5151   [(set (match_operand:CI 0 "register_operand" "=w")
5152        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5153                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5154                   UNSPEC_LD3_DUP))]
5155   "TARGET_SIMD"
5156   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5157   [(set_attr "type" "neon_load3_all_lanes<q>")]
5158 )
5159
5160 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5161   [(set (match_operand:CI 0 "register_operand" "=w")
5162         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5163                     (match_operand:CI 2 "register_operand" "0")
5164                     (match_operand:SI 3 "immediate_operand" "i")
5165                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5166                    UNSPEC_LD3_LANE))]
5167   "TARGET_SIMD"
5168 {
5169     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5170     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5171 }
5172   [(set_attr "type" "neon_load3_one_lane")]
5173 )
5174
5175 (define_expand "vec_load_lanesci<mode>"
5176   [(set (match_operand:CI 0 "register_operand")
5177         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5178                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5179                    UNSPEC_LD3))]
5180   "TARGET_SIMD"
5181 {
5182   if (BYTES_BIG_ENDIAN)
5183     {
5184       rtx tmp = gen_reg_rtx (CImode);
5185       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5186       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5187       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5188     }
5189   else
5190     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5191   DONE;
5192 })
5193
5194 (define_insn "aarch64_simd_st3<mode>"
5195   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5196         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5197                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5198                    UNSPEC_ST3))]
5199   "TARGET_SIMD"
5200   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5201   [(set_attr "type" "neon_store3_3reg<q>")]
5202 )
5203
5204 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5205 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5206   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5207         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5208                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5209                      (match_operand:SI 2 "immediate_operand" "i")]
5210                     UNSPEC_ST3_LANE))]
5211   "TARGET_SIMD"
5212   {
5213     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5214     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5215   }
5216   [(set_attr "type" "neon_store3_one_lane<q>")]
5217 )
5218
5219 (define_expand "vec_store_lanesci<mode>"
5220   [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5221         (unspec:CI [(match_operand:CI 1 "register_operand")
5222                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5223                    UNSPEC_ST3))]
5224   "TARGET_SIMD"
5225 {
5226   if (BYTES_BIG_ENDIAN)
5227     {
5228       rtx tmp = gen_reg_rtx (CImode);
5229       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5230       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5231       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5232     }
5233   else
5234     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5235   DONE;
5236 })
5237
5238 (define_insn "aarch64_simd_ld4<mode>"
5239   [(set (match_operand:XI 0 "register_operand" "=w")
5240         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5241                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5242                    UNSPEC_LD4))]
5243   "TARGET_SIMD"
5244   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5245   [(set_attr "type" "neon_load4_4reg<q>")]
5246 )
5247
5248 (define_insn "aarch64_simd_ld4r<mode>"
5249   [(set (match_operand:XI 0 "register_operand" "=w")
5250        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5251                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5252                   UNSPEC_LD4_DUP))]
5253   "TARGET_SIMD"
5254   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5255   [(set_attr "type" "neon_load4_all_lanes<q>")]
5256 )
5257
5258 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5259   [(set (match_operand:XI 0 "register_operand" "=w")
5260         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5261                     (match_operand:XI 2 "register_operand" "0")
5262                     (match_operand:SI 3 "immediate_operand" "i")
5263                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5264                    UNSPEC_LD4_LANE))]
5265   "TARGET_SIMD"
5266 {
5267     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5268     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5269 }
5270   [(set_attr "type" "neon_load4_one_lane")]
5271 )
5272
5273 (define_expand "vec_load_lanesxi<mode>"
5274   [(set (match_operand:XI 0 "register_operand")
5275         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5276                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5277                    UNSPEC_LD4))]
5278   "TARGET_SIMD"
5279 {
5280   if (BYTES_BIG_ENDIAN)
5281     {
5282       rtx tmp = gen_reg_rtx (XImode);
5283       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5284       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5285       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5286     }
5287   else
5288     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5289   DONE;
5290 })
5291
5292 (define_insn "aarch64_simd_st4<mode>"
5293   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5294         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5295                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5296                    UNSPEC_ST4))]
5297   "TARGET_SIMD"
5298   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5299   [(set_attr "type" "neon_store4_4reg<q>")]
5300 )
5301
5302 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5303 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5304   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5305         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5306                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5307                      (match_operand:SI 2 "immediate_operand" "i")]
5308                     UNSPEC_ST4_LANE))]
5309   "TARGET_SIMD"
5310   {
5311     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5312     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5313   }
5314   [(set_attr "type" "neon_store4_one_lane<q>")]
5315 )
5316
5317 (define_expand "vec_store_lanesxi<mode>"
5318   [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5319         (unspec:XI [(match_operand:XI 1 "register_operand")
5320                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5321                    UNSPEC_ST4))]
5322   "TARGET_SIMD"
5323 {
5324   if (BYTES_BIG_ENDIAN)
5325     {
5326       rtx tmp = gen_reg_rtx (XImode);
5327       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5328       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5329       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5330     }
5331   else
5332     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5333   DONE;
5334 })
5335
5336 (define_insn_and_split "aarch64_rev_reglist<mode>"
5337 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5338         (unspec:VSTRUCT
5339                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5340                     (match_operand:V16QI 2 "register_operand" "w")]
5341                    UNSPEC_REV_REGLIST))]
5342   "TARGET_SIMD"
5343   "#"
5344   "&& reload_completed"
5345   [(const_int 0)]
5346 {
5347   int i;
5348   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5349   for (i = 0; i < nregs; i++)
5350     {
5351       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5352       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5353       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5354     }
5355   DONE;
5356 }
5357   [(set_attr "type" "neon_tbl1_q")
5358    (set_attr "length" "<insn_count>")]
5359 )
5360
5361 ;; Reload patterns for AdvSIMD register list operands.
5362
5363 (define_expand "mov<mode>"
5364   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5365         (match_operand:VSTRUCT 1 "general_operand"))]
5366   "TARGET_SIMD"
5367 {
5368   if (can_create_pseudo_p ())
5369     {
5370       if (GET_CODE (operands[0]) != REG)
5371         operands[1] = force_reg (<MODE>mode, operands[1]);
5372     }
5373 })
5374
5375
5376 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5377   [(match_operand:CI 0 "register_operand")
5378    (match_operand:DI 1 "register_operand")
5379    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5380   "TARGET_SIMD"
5381 {
5382   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5383   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5384   DONE;
5385 })
5386
5387 (define_insn "aarch64_ld1_x3_<mode>"
5388   [(set (match_operand:CI 0 "register_operand" "=w")
5389         (unspec:CI
5390           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5391            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5392   "TARGET_SIMD"
5393   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5394   [(set_attr "type" "neon_load1_3reg<q>")]
5395 )
5396
5397 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5398   [(match_operand:XI 0 "register_operand" "=w")
5399    (match_operand:DI 1 "register_operand" "r")
5400    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5401   "TARGET_SIMD"
5402 {
5403   rtx mem = gen_rtx_MEM (XImode, operands[1]);
5404   emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5405   DONE;
5406 })
5407
5408 (define_insn "aarch64_ld1_x4_<mode>"
5409   [(set (match_operand:XI 0 "register_operand" "=w")
5410         (unspec:XI
5411           [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5412            (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5413         UNSPEC_LD1))]
5414   "TARGET_SIMD"
5415   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5416   [(set_attr "type" "neon_load1_4reg<q>")]
5417 )
5418
5419 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5420   [(match_operand:DI 0 "register_operand")
5421    (match_operand:OI 1 "register_operand")
5422    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5423   "TARGET_SIMD"
5424 {
5425   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5426   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5427   DONE;
5428 })
5429
5430 (define_insn "aarch64_st1_x2_<mode>"
5431    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5432          (unspec:OI
5433           [(match_operand:OI 1 "register_operand" "w")
5434           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5435   "TARGET_SIMD"
5436   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5437   [(set_attr "type" "neon_store1_2reg<q>")]
5438 )
5439
5440 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5441   [(match_operand:DI 0 "register_operand")
5442    (match_operand:CI 1 "register_operand")
5443    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5444   "TARGET_SIMD"
5445 {
5446   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5447   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5448   DONE;
5449 })
5450
5451 (define_insn "aarch64_st1_x3_<mode>"
5452    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5453         (unspec:CI
5454          [(match_operand:CI 1 "register_operand" "w")
5455           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5456   "TARGET_SIMD"
5457   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5458   [(set_attr "type" "neon_store1_3reg<q>")]
5459 )
5460
5461 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5462   [(match_operand:DI 0 "register_operand" "")
5463    (match_operand:XI 1 "register_operand" "")
5464    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5465   "TARGET_SIMD"
5466 {
5467   rtx mem = gen_rtx_MEM (XImode, operands[0]);
5468   emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5469   DONE;
5470 })
5471
5472 (define_insn "aarch64_st1_x4_<mode>"
5473   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5474         (unspec:XI
5475            [(match_operand:XI 1 "register_operand" "w")
5476            (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5477         UNSPEC_ST1))]
5478   "TARGET_SIMD"
5479   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5480   [(set_attr "type" "neon_store1_4reg<q>")]
5481 )
5482
5483 (define_insn "*aarch64_mov<mode>"
5484   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5485         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5486   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5487    && (register_operand (operands[0], <MODE>mode)
5488        || register_operand (operands[1], <MODE>mode))"
5489   "@
5490    #
5491    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5492    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5493   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5494                      neon_load<nregs>_<nregs>reg_q")
5495    (set_attr "length" "<insn_count>,4,4")]
5496 )
5497
5498 (define_insn "aarch64_be_ld1<mode>"
5499   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5500         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5501                              "aarch64_simd_struct_operand" "Utv")]
5502         UNSPEC_LD1))]
5503   "TARGET_SIMD"
5504   "ld1\\t{%0<Vmtype>}, %1"
5505   [(set_attr "type" "neon_load1_1reg<q>")]
5506 )
5507
5508 (define_insn "aarch64_be_st1<mode>"
5509   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5510         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5511         UNSPEC_ST1))]
5512   "TARGET_SIMD"
5513   "st1\\t{%1<Vmtype>}, %0"
5514   [(set_attr "type" "neon_store1_1reg<q>")]
5515 )
5516
5517 (define_insn "*aarch64_be_movoi"
5518   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5519         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5520   "TARGET_SIMD && BYTES_BIG_ENDIAN
5521    && (register_operand (operands[0], OImode)
5522        || register_operand (operands[1], OImode))"
5523   "@
5524    #
5525    stp\\t%q1, %R1, %0
5526    ldp\\t%q0, %R0, %1"
5527   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5528    (set_attr "length" "8,4,4")]
5529 )
5530
5531 (define_insn "*aarch64_be_movci"
5532   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5533         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5534   "TARGET_SIMD && BYTES_BIG_ENDIAN
5535    && (register_operand (operands[0], CImode)
5536        || register_operand (operands[1], CImode))"
5537   "#"
5538   [(set_attr "type" "multiple")
5539    (set_attr "length" "12,4,4")]
5540 )
5541
5542 (define_insn "*aarch64_be_movxi"
5543   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5544         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5545   "TARGET_SIMD && BYTES_BIG_ENDIAN
5546    && (register_operand (operands[0], XImode)
5547        || register_operand (operands[1], XImode))"
5548   "#"
5549   [(set_attr "type" "multiple")
5550    (set_attr "length" "16,4,4")]
5551 )
5552
5553 (define_split
5554   [(set (match_operand:OI 0 "register_operand")
5555         (match_operand:OI 1 "register_operand"))]
5556   "TARGET_SIMD && reload_completed"
5557   [(const_int 0)]
5558 {
5559   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5560   DONE;
5561 })
5562
5563 (define_split
5564   [(set (match_operand:CI 0 "nonimmediate_operand")
5565         (match_operand:CI 1 "general_operand"))]
5566   "TARGET_SIMD && reload_completed"
5567   [(const_int 0)]
5568 {
5569   if (register_operand (operands[0], CImode)
5570       && register_operand (operands[1], CImode))
5571     {
5572       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5573       DONE;
5574     }
5575   else if (BYTES_BIG_ENDIAN)
5576     {
5577       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5578                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5579       emit_move_insn (gen_lowpart (V16QImode,
5580                                    simplify_gen_subreg (TImode, operands[0],
5581                                                         CImode, 32)),
5582                       gen_lowpart (V16QImode,
5583                                    simplify_gen_subreg (TImode, operands[1],
5584                                                         CImode, 32)));
5585       DONE;
5586     }
5587   else
5588     FAIL;
5589 })
5590
5591 (define_split
5592   [(set (match_operand:XI 0 "nonimmediate_operand")
5593         (match_operand:XI 1 "general_operand"))]
5594   "TARGET_SIMD && reload_completed"
5595   [(const_int 0)]
5596 {
5597   if (register_operand (operands[0], XImode)
5598       && register_operand (operands[1], XImode))
5599     {
5600       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5601       DONE;
5602     }
5603   else if (BYTES_BIG_ENDIAN)
5604     {
5605       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5606                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5607       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5608                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5609       DONE;
5610     }
5611   else
5612     FAIL;
5613 })
5614
5615 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5616   [(match_operand:VSTRUCT 0 "register_operand")
5617    (match_operand:DI 1 "register_operand")
5618    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5619   "TARGET_SIMD"
5620 {
5621   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5622   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5623                      * <VSTRUCT:nregs>);
5624
5625   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5626                                                                 mem));
5627   DONE;
5628 })
5629
5630 (define_insn "aarch64_ld2<mode>_dreg"
5631   [(set (match_operand:OI 0 "register_operand" "=w")
5632         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5633                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5634                    UNSPEC_LD2_DREG))]
5635   "TARGET_SIMD"
5636   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5637   [(set_attr "type" "neon_load2_2reg<q>")]
5638 )
5639
5640 (define_insn "aarch64_ld2<mode>_dreg"
5641   [(set (match_operand:OI 0 "register_operand" "=w")
5642         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5643                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5644                    UNSPEC_LD2_DREG))]
5645   "TARGET_SIMD"
5646   "ld1\\t{%S0.1d - %T0.1d}, %1"
5647   [(set_attr "type" "neon_load1_2reg<q>")]
5648 )
5649
5650 (define_insn "aarch64_ld3<mode>_dreg"
5651   [(set (match_operand:CI 0 "register_operand" "=w")
5652         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5653                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5654                    UNSPEC_LD3_DREG))]
5655   "TARGET_SIMD"
5656   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5657   [(set_attr "type" "neon_load3_3reg<q>")]
5658 )
5659
5660 (define_insn "aarch64_ld3<mode>_dreg"
5661   [(set (match_operand:CI 0 "register_operand" "=w")
5662         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5663                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5664                    UNSPEC_LD3_DREG))]
5665   "TARGET_SIMD"
5666   "ld1\\t{%S0.1d - %U0.1d}, %1"
5667   [(set_attr "type" "neon_load1_3reg<q>")]
5668 )
5669
5670 (define_insn "aarch64_ld4<mode>_dreg"
5671   [(set (match_operand:XI 0 "register_operand" "=w")
5672         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5673                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5674                    UNSPEC_LD4_DREG))]
5675   "TARGET_SIMD"
5676   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5677   [(set_attr "type" "neon_load4_4reg<q>")]
5678 )
5679
5680 (define_insn "aarch64_ld4<mode>_dreg"
5681   [(set (match_operand:XI 0 "register_operand" "=w")
5682         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5683                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5684                    UNSPEC_LD4_DREG))]
5685   "TARGET_SIMD"
5686   "ld1\\t{%S0.1d - %V0.1d}, %1"
5687   [(set_attr "type" "neon_load1_4reg<q>")]
5688 )
5689
5690 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5691  [(match_operand:VSTRUCT 0 "register_operand")
5692   (match_operand:DI 1 "register_operand")
5693   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5694   "TARGET_SIMD"
5695 {
5696   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5697   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5698
5699   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5700   DONE;
5701 })
5702
5703 (define_expand "aarch64_ld1<VALL_F16:mode>"
5704  [(match_operand:VALL_F16 0 "register_operand")
5705   (match_operand:DI 1 "register_operand")]
5706   "TARGET_SIMD"
5707 {
5708   machine_mode mode = <VALL_F16:MODE>mode;
5709   rtx mem = gen_rtx_MEM (mode, operands[1]);
5710
5711   if (BYTES_BIG_ENDIAN)
5712     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5713   else
5714     emit_move_insn (operands[0], mem);
5715   DONE;
5716 })
5717
5718 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5719  [(match_operand:VSTRUCT 0 "register_operand")
5720   (match_operand:DI 1 "register_operand")
5721   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5722   "TARGET_SIMD"
5723 {
5724   machine_mode mode = <VSTRUCT:MODE>mode;
5725   rtx mem = gen_rtx_MEM (mode, operands[1]);
5726
5727   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5728   DONE;
5729 })
5730
5731 (define_expand "aarch64_ld1x2<VQ:mode>"
5732  [(match_operand:OI 0 "register_operand")
5733   (match_operand:DI 1 "register_operand")
5734   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5735   "TARGET_SIMD"
5736 {
5737   machine_mode mode = OImode;
5738   rtx mem = gen_rtx_MEM (mode, operands[1]);
5739
5740   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5741   DONE;
5742 })
5743
5744 (define_expand "aarch64_ld1x2<VDC:mode>"
5745  [(match_operand:OI 0 "register_operand")
5746   (match_operand:DI 1 "register_operand")
5747   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5748   "TARGET_SIMD"
5749 {
5750   machine_mode mode = OImode;
5751   rtx mem = gen_rtx_MEM (mode, operands[1]);
5752
5753   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5754   DONE;
5755 })
5756
5757
5758 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5759   [(match_operand:VSTRUCT 0 "register_operand")
5760         (match_operand:DI 1 "register_operand")
5761         (match_operand:VSTRUCT 2 "register_operand")
5762         (match_operand:SI 3 "immediate_operand")
5763         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5764   "TARGET_SIMD"
5765 {
5766   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5767   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5768                      * <VSTRUCT:nregs>);
5769
5770   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5771   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5772         operands[0], mem, operands[2], operands[3]));
5773   DONE;
5774 })
5775
5776 ;; Expanders for builtins to extract vector registers from large
5777 ;; opaque integer modes.
5778
5779 ;; D-register list.
5780
5781 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5782  [(match_operand:VDC 0 "register_operand")
5783   (match_operand:VSTRUCT 1 "register_operand")
5784   (match_operand:SI 2 "immediate_operand")]
5785   "TARGET_SIMD"
5786 {
5787   int part = INTVAL (operands[2]);
5788   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5789   int offset = part * 16;
5790
5791   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5792   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5793   DONE;
5794 })
5795
5796 ;; Q-register list.
5797
5798 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5799  [(match_operand:VQ 0 "register_operand")
5800   (match_operand:VSTRUCT 1 "register_operand")
5801   (match_operand:SI 2 "immediate_operand")]
5802   "TARGET_SIMD"
5803 {
5804   int part = INTVAL (operands[2]);
5805   int offset = part * 16;
5806
5807   emit_move_insn (operands[0],
5808                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5809   DONE;
5810 })
5811
5812 ;; Permuted-store expanders for neon intrinsics.
5813
5814 ;; Permute instructions
5815
5816 ;; vec_perm support
5817
5818 (define_expand "vec_perm<mode>"
5819   [(match_operand:VB 0 "register_operand")
5820    (match_operand:VB 1 "register_operand")
5821    (match_operand:VB 2 "register_operand")
5822    (match_operand:VB 3 "register_operand")]
5823   "TARGET_SIMD"
5824 {
5825   aarch64_expand_vec_perm (operands[0], operands[1],
5826                            operands[2], operands[3], <nunits>);
5827   DONE;
5828 })
5829
5830 (define_insn "aarch64_tbl1<mode>"
5831   [(set (match_operand:VB 0 "register_operand" "=w")
5832         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5833                     (match_operand:VB 2 "register_operand" "w")]
5834                    UNSPEC_TBL))]
5835   "TARGET_SIMD"
5836   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5837   [(set_attr "type" "neon_tbl1<q>")]
5838 )
5839
5840 ;; Two source registers.
5841
5842 (define_insn "aarch64_tbl2v16qi"
5843   [(set (match_operand:V16QI 0 "register_operand" "=w")
5844         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5845                        (match_operand:V16QI 2 "register_operand" "w")]
5846                       UNSPEC_TBL))]
5847   "TARGET_SIMD"
5848   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5849   [(set_attr "type" "neon_tbl2_q")]
5850 )
5851
5852 (define_insn "aarch64_tbl3<mode>"
5853   [(set (match_operand:VB 0 "register_operand" "=w")
5854         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5855                       (match_operand:VB 2 "register_operand" "w")]
5856                       UNSPEC_TBL))]
5857   "TARGET_SIMD"
5858   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5859   [(set_attr "type" "neon_tbl3")]
5860 )
5861
5862 (define_insn "aarch64_tbx4<mode>"
5863   [(set (match_operand:VB 0 "register_operand" "=w")
5864         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5865                       (match_operand:OI 2 "register_operand" "w")
5866                       (match_operand:VB 3 "register_operand" "w")]
5867                       UNSPEC_TBX))]
5868   "TARGET_SIMD"
5869   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5870   [(set_attr "type" "neon_tbl4")]
5871 )
5872
5873 ;; Three source registers.
5874
5875 (define_insn "aarch64_qtbl3<mode>"
5876   [(set (match_operand:VB 0 "register_operand" "=w")
5877         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5878                       (match_operand:VB 2 "register_operand" "w")]
5879                       UNSPEC_TBL))]
5880   "TARGET_SIMD"
5881   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5882   [(set_attr "type" "neon_tbl3")]
5883 )
5884
5885 (define_insn "aarch64_qtbx3<mode>"
5886   [(set (match_operand:VB 0 "register_operand" "=w")
5887         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5888                       (match_operand:CI 2 "register_operand" "w")
5889                       (match_operand:VB 3 "register_operand" "w")]
5890                       UNSPEC_TBX))]
5891   "TARGET_SIMD"
5892   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5893   [(set_attr "type" "neon_tbl3")]
5894 )
5895
5896 ;; Four source registers.
5897
5898 (define_insn "aarch64_qtbl4<mode>"
5899   [(set (match_operand:VB 0 "register_operand" "=w")
5900         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5901                       (match_operand:VB 2 "register_operand" "w")]
5902                       UNSPEC_TBL))]
5903   "TARGET_SIMD"
5904   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5905   [(set_attr "type" "neon_tbl4")]
5906 )
5907
5908 (define_insn "aarch64_qtbx4<mode>"
5909   [(set (match_operand:VB 0 "register_operand" "=w")
5910         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5911                       (match_operand:XI 2 "register_operand" "w")
5912                       (match_operand:VB 3 "register_operand" "w")]
5913                       UNSPEC_TBX))]
5914   "TARGET_SIMD"
5915   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5916   [(set_attr "type" "neon_tbl4")]
5917 )
5918
5919 (define_insn_and_split "aarch64_combinev16qi"
5920   [(set (match_operand:OI 0 "register_operand" "=w")
5921         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5922                     (match_operand:V16QI 2 "register_operand" "w")]
5923                    UNSPEC_CONCAT))]
5924   "TARGET_SIMD"
5925   "#"
5926   "&& reload_completed"
5927   [(const_int 0)]
5928 {
5929   aarch64_split_combinev16qi (operands);
5930   DONE;
5931 }
5932 [(set_attr "type" "multiple")]
5933 )
5934
5935 ;; This instruction's pattern is generated directly by
5936 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5937 ;; need corresponding changes there.
5938 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
5939   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5940         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5941                           (match_operand:VALL_F16 2 "register_operand" "w")]
5942          PERMUTE))]
5943   "TARGET_SIMD"
5944   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5945   [(set_attr "type" "neon_permute<q>")]
5946 )
5947
5948 ;; This instruction's pattern is generated directly by
5949 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5950 ;; need corresponding changes there.  Note that the immediate (third)
5951 ;; operand is a lane index not a byte index.
5952 (define_insn "aarch64_ext<mode>"
5953   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5954         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5955                           (match_operand:VALL_F16 2 "register_operand" "w")
5956                           (match_operand:SI 3 "immediate_operand" "i")]
5957          UNSPEC_EXT))]
5958   "TARGET_SIMD"
5959 {
5960   operands[3] = GEN_INT (INTVAL (operands[3])
5961       * GET_MODE_UNIT_SIZE (<MODE>mode));
5962   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5963 }
5964   [(set_attr "type" "neon_ext<q>")]
5965 )
5966
5967 ;; This instruction's pattern is generated directly by
5968 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5969 ;; need corresponding changes there.
5970 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5971   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5972         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5973                     REVERSE))]
5974   "TARGET_SIMD"
5975   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5976   [(set_attr "type" "neon_rev<q>")]
5977 )
5978
5979 (define_insn "aarch64_st2<mode>_dreg"
5980   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5981         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5982                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5983                    UNSPEC_ST2))]
5984   "TARGET_SIMD"
5985   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5986   [(set_attr "type" "neon_store2_2reg")]
5987 )
5988
5989 (define_insn "aarch64_st2<mode>_dreg"
5990   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5991         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5992                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5993                    UNSPEC_ST2))]
5994   "TARGET_SIMD"
5995   "st1\\t{%S1.1d - %T1.1d}, %0"
5996   [(set_attr "type" "neon_store1_2reg")]
5997 )
5998
5999 (define_insn "aarch64_st3<mode>_dreg"
6000   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6001         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6002                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6003                    UNSPEC_ST3))]
6004   "TARGET_SIMD"
6005   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6006   [(set_attr "type" "neon_store3_3reg")]
6007 )
6008
6009 (define_insn "aarch64_st3<mode>_dreg"
6010   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6011         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6012                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6013                    UNSPEC_ST3))]
6014   "TARGET_SIMD"
6015   "st1\\t{%S1.1d - %U1.1d}, %0"
6016   [(set_attr "type" "neon_store1_3reg")]
6017 )
6018
6019 (define_insn "aarch64_st4<mode>_dreg"
6020   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6021         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6022                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6023                    UNSPEC_ST4))]
6024   "TARGET_SIMD"
6025   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
6026   [(set_attr "type" "neon_store4_4reg")]
6027 )
6028
6029 (define_insn "aarch64_st4<mode>_dreg"
6030   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6031         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6032                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6033                    UNSPEC_ST4))]
6034   "TARGET_SIMD"
6035   "st1\\t{%S1.1d - %V1.1d}, %0"
6036   [(set_attr "type" "neon_store1_4reg")]
6037 )
6038
6039 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
6040  [(match_operand:DI 0 "register_operand")
6041   (match_operand:VSTRUCT 1 "register_operand")
6042   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6043   "TARGET_SIMD"
6044 {
6045   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6046   set_mem_size (mem, <VSTRUCT:nregs> * 8);
6047
6048   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
6049   DONE;
6050 })
6051
6052 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
6053  [(match_operand:DI 0 "register_operand")
6054   (match_operand:VSTRUCT 1 "register_operand")
6055   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6056   "TARGET_SIMD"
6057 {
6058   machine_mode mode = <VSTRUCT:MODE>mode;
6059   rtx mem = gen_rtx_MEM (mode, operands[0]);
6060
6061   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
6062   DONE;
6063 })
6064
6065 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6066  [(match_operand:DI 0 "register_operand")
6067   (match_operand:VSTRUCT 1 "register_operand")
6068   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
6069   (match_operand:SI 2 "immediate_operand")]
6070   "TARGET_SIMD"
6071 {
6072   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6073   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6074                      * <VSTRUCT:nregs>);
6075
6076   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6077                 mem, operands[1], operands[2]));
6078   DONE;
6079 })
6080
6081 (define_expand "aarch64_st1<VALL_F16:mode>"
6082  [(match_operand:DI 0 "register_operand")
6083   (match_operand:VALL_F16 1 "register_operand")]
6084   "TARGET_SIMD"
6085 {
6086   machine_mode mode = <VALL_F16:MODE>mode;
6087   rtx mem = gen_rtx_MEM (mode, operands[0]);
6088
6089   if (BYTES_BIG_ENDIAN)
6090     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
6091   else
6092     emit_move_insn (mem, operands[1]);
6093   DONE;
6094 })
6095
6096 ;; Expander for builtins to insert vector registers into large
6097 ;; opaque integer modes.
6098
6099 ;; Q-register list.  We don't need a D-reg inserter as we zero
6100 ;; extend them in arm_neon.h and insert the resulting Q-regs.
6101
6102 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
6103  [(match_operand:VSTRUCT 0 "register_operand")
6104   (match_operand:VSTRUCT 1 "register_operand")
6105   (match_operand:VQ 2 "register_operand")
6106   (match_operand:SI 3 "immediate_operand")]
6107   "TARGET_SIMD"
6108 {
6109   int part = INTVAL (operands[3]);
6110   int offset = part * 16;
6111
6112   emit_move_insn (operands[0], operands[1]);
6113   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
6114                   operands[2]);
6115   DONE;
6116 })
6117
6118 ;; Standard pattern name vec_init<mode><Vel>.
6119
6120 (define_expand "vec_init<mode><Vel>"
6121   [(match_operand:VALL_F16 0 "register_operand")
6122    (match_operand 1 "" "")]
6123   "TARGET_SIMD"
6124 {
6125   aarch64_expand_vector_init (operands[0], operands[1]);
6126   DONE;
6127 })
6128
6129 (define_expand "vec_init<mode><Vhalf>"
6130   [(match_operand:VQ_NO2E 0 "register_operand")
6131    (match_operand 1 "" "")]
6132   "TARGET_SIMD"
6133 {
6134   aarch64_expand_vector_init (operands[0], operands[1]);
6135   DONE;
6136 })
6137
6138 (define_insn "*aarch64_simd_ld1r<mode>"
6139   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6140         (vec_duplicate:VALL_F16
6141           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
6142   "TARGET_SIMD"
6143   "ld1r\\t{%0.<Vtype>}, %1"
6144   [(set_attr "type" "neon_load1_all_lanes")]
6145 )
6146
6147 (define_insn "aarch64_simd_ld1<mode>_x2"
6148   [(set (match_operand:OI 0 "register_operand" "=w")
6149         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6150                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6151                    UNSPEC_LD1))]
6152   "TARGET_SIMD"
6153   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6154   [(set_attr "type" "neon_load1_2reg<q>")]
6155 )
6156
6157 (define_insn "aarch64_simd_ld1<mode>_x2"
6158   [(set (match_operand:OI 0 "register_operand" "=w")
6159         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6160                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6161                    UNSPEC_LD1))]
6162   "TARGET_SIMD"
6163   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6164   [(set_attr "type" "neon_load1_2reg<q>")]
6165 )
6166
6167
6168 (define_insn "@aarch64_frecpe<mode>"
6169   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6170         (unspec:VHSDF_HSDF
6171          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6172          UNSPEC_FRECPE))]
6173   "TARGET_SIMD"
6174   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6175   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6176 )
6177
6178 (define_insn "aarch64_frecpx<mode>"
6179   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6180         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6181          UNSPEC_FRECPX))]
6182   "TARGET_SIMD"
6183   "frecpx\t%<s>0, %<s>1"
6184   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6185 )
6186
6187 (define_insn "@aarch64_frecps<mode>"
6188   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6189         (unspec:VHSDF_HSDF
6190           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6191           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6192           UNSPEC_FRECPS))]
6193   "TARGET_SIMD"
6194   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6195   [(set_attr "type" "neon_fp_recps_<stype><q>")]
6196 )
6197
6198 (define_insn "aarch64_urecpe<mode>"
6199   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6200         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6201                 UNSPEC_URECPE))]
6202  "TARGET_SIMD"
6203  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6204   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6205
6206 ;; Standard pattern name vec_extract<mode><Vel>.
6207
6208 (define_expand "vec_extract<mode><Vel>"
6209   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6210    (match_operand:VALL_F16 1 "register_operand")
6211    (match_operand:SI 2 "immediate_operand")]
6212   "TARGET_SIMD"
6213 {
6214     emit_insn
6215       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6216     DONE;
6217 })
6218
6219 ;; Extract a 64-bit vector from one half of a 128-bit vector.
6220 (define_expand "vec_extract<mode><Vhalf>"
6221   [(match_operand:<VHALF> 0 "register_operand")
6222    (match_operand:VQMOV_NO2E 1 "register_operand")
6223    (match_operand 2 "immediate_operand")]
6224   "TARGET_SIMD"
6225 {
6226   int start = INTVAL (operands[2]);
6227   if (start != 0 && start != <nunits> / 2)
6228     FAIL;
6229   rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
6230   emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
6231   DONE;
6232 })
6233
6234 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
6235 (define_expand "vec_extractv2dfv1df"
6236   [(match_operand:V1DF 0 "register_operand")
6237    (match_operand:V2DF 1 "register_operand")
6238    (match_operand 2 "immediate_operand")]
6239   "TARGET_SIMD"
6240 {
6241   /* V1DF is rarely used by other patterns, so it should be better to hide
6242      it in a subreg destination of a normal DF op.  */
6243   rtx scalar0 = gen_lowpart (DFmode, operands[0]);
6244   emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
6245   DONE;
6246 })
6247
6248 ;; aes
6249
6250 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6251   [(set (match_operand:V16QI 0 "register_operand" "=w")
6252         (unspec:V16QI
6253                 [(xor:V16QI
6254                  (match_operand:V16QI 1 "register_operand" "%0")
6255                  (match_operand:V16QI 2 "register_operand" "w"))]
6256          CRYPTO_AES))]
6257   "TARGET_SIMD && TARGET_AES"
6258   "aes<aes_op>\\t%0.16b, %2.16b"
6259   [(set_attr "type" "crypto_aese")]
6260 )
6261
6262 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6263   [(set (match_operand:V16QI 0 "register_operand" "=w")
6264         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6265          CRYPTO_AESMC))]
6266   "TARGET_SIMD && TARGET_AES"
6267   "aes<aesmc_op>\\t%0.16b, %1.16b"
6268   [(set_attr "type" "crypto_aesmc")]
6269 )
6270
6271 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6272 ;; and enforce the register dependency without scheduling or register
6273 ;; allocation messing up the order or introducing moves inbetween.
6274 ;;  Mash the two together during combine.
6275
6276 (define_insn "*aarch64_crypto_aese_fused"
6277   [(set (match_operand:V16QI 0 "register_operand" "=w")
6278         (unspec:V16QI
6279           [(unspec:V16QI
6280            [(xor:V16QI
6281                 (match_operand:V16QI 1 "register_operand" "%0")
6282                 (match_operand:V16QI 2 "register_operand" "w"))]
6283              UNSPEC_AESE)]
6284         UNSPEC_AESMC))]
6285   "TARGET_SIMD && TARGET_AES
6286    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6287   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6288   [(set_attr "type" "crypto_aese")
6289    (set_attr "length" "8")]
6290 )
6291
6292 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6293 ;; and enforce the register dependency without scheduling or register
6294 ;; allocation messing up the order or introducing moves inbetween.
6295 ;;  Mash the two together during combine.
6296
6297 (define_insn "*aarch64_crypto_aesd_fused"
6298   [(set (match_operand:V16QI 0 "register_operand" "=w")
6299         (unspec:V16QI
6300           [(unspec:V16QI
6301                     [(xor:V16QI
6302                         (match_operand:V16QI 1 "register_operand" "%0")
6303                         (match_operand:V16QI 2 "register_operand" "w"))]
6304                 UNSPEC_AESD)]
6305           UNSPEC_AESIMC))]
6306   "TARGET_SIMD && TARGET_AES
6307    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6308   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6309   [(set_attr "type" "crypto_aese")
6310    (set_attr "length" "8")]
6311 )
6312
6313 ;; sha1
6314
6315 (define_insn "aarch64_crypto_sha1hsi"
6316   [(set (match_operand:SI 0 "register_operand" "=w")
6317         (unspec:SI [(match_operand:SI 1
6318                        "register_operand" "w")]
6319          UNSPEC_SHA1H))]
6320   "TARGET_SIMD && TARGET_SHA2"
6321   "sha1h\\t%s0, %s1"
6322   [(set_attr "type" "crypto_sha1_fast")]
6323 )
6324
6325 (define_insn "aarch64_crypto_sha1hv4si"
6326   [(set (match_operand:SI 0 "register_operand" "=w")
6327         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6328                      (parallel [(const_int 0)]))]
6329          UNSPEC_SHA1H))]
6330   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6331   "sha1h\\t%s0, %s1"
6332   [(set_attr "type" "crypto_sha1_fast")]
6333 )
6334
6335 (define_insn "aarch64_be_crypto_sha1hv4si"
6336   [(set (match_operand:SI 0 "register_operand" "=w")
6337         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6338                      (parallel [(const_int 3)]))]
6339          UNSPEC_SHA1H))]
6340   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6341   "sha1h\\t%s0, %s1"
6342   [(set_attr "type" "crypto_sha1_fast")]
6343 )
6344
6345 (define_insn "aarch64_crypto_sha1su1v4si"
6346   [(set (match_operand:V4SI 0 "register_operand" "=w")
6347         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6348                       (match_operand:V4SI 2 "register_operand" "w")]
6349          UNSPEC_SHA1SU1))]
6350   "TARGET_SIMD && TARGET_SHA2"
6351   "sha1su1\\t%0.4s, %2.4s"
6352   [(set_attr "type" "crypto_sha1_fast")]
6353 )
6354
6355 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6356   [(set (match_operand:V4SI 0 "register_operand" "=w")
6357         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6358                       (match_operand:SI 2 "register_operand" "w")
6359                       (match_operand:V4SI 3 "register_operand" "w")]
6360          CRYPTO_SHA1))]
6361   "TARGET_SIMD && TARGET_SHA2"
6362   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6363   [(set_attr "type" "crypto_sha1_slow")]
6364 )
6365
6366 (define_insn "aarch64_crypto_sha1su0v4si"
6367   [(set (match_operand:V4SI 0 "register_operand" "=w")
6368         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6369                       (match_operand:V4SI 2 "register_operand" "w")
6370                       (match_operand:V4SI 3 "register_operand" "w")]
6371          UNSPEC_SHA1SU0))]
6372   "TARGET_SIMD && TARGET_SHA2"
6373   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6374   [(set_attr "type" "crypto_sha1_xor")]
6375 )
6376
6377 ;; sha256
6378
6379 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6380   [(set (match_operand:V4SI 0 "register_operand" "=w")
6381         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6382                       (match_operand:V4SI 2 "register_operand" "w")
6383                       (match_operand:V4SI 3 "register_operand" "w")]
6384          CRYPTO_SHA256))]
6385   "TARGET_SIMD && TARGET_SHA2"
6386   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6387   [(set_attr "type" "crypto_sha256_slow")]
6388 )
6389
6390 (define_insn "aarch64_crypto_sha256su0v4si"
6391   [(set (match_operand:V4SI 0 "register_operand" "=w")
6392         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6393                       (match_operand:V4SI 2 "register_operand" "w")]
6394          UNSPEC_SHA256SU0))]
6395   "TARGET_SIMD && TARGET_SHA2"
6396   "sha256su0\\t%0.4s, %2.4s"
6397   [(set_attr "type" "crypto_sha256_fast")]
6398 )
6399
6400 (define_insn "aarch64_crypto_sha256su1v4si"
6401   [(set (match_operand:V4SI 0 "register_operand" "=w")
6402         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6403                       (match_operand:V4SI 2 "register_operand" "w")
6404                       (match_operand:V4SI 3 "register_operand" "w")]
6405          UNSPEC_SHA256SU1))]
6406   "TARGET_SIMD && TARGET_SHA2"
6407   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6408   [(set_attr "type" "crypto_sha256_slow")]
6409 )
6410
6411 ;; sha512
6412
6413 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6414   [(set (match_operand:V2DI 0 "register_operand" "=w")
6415         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6416                       (match_operand:V2DI 2 "register_operand" "w")
6417                       (match_operand:V2DI 3 "register_operand" "w")]
6418          CRYPTO_SHA512))]
6419   "TARGET_SIMD && TARGET_SHA3"
6420   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6421   [(set_attr "type" "crypto_sha512")]
6422 )
6423
6424 (define_insn "aarch64_crypto_sha512su0qv2di"
6425   [(set (match_operand:V2DI 0 "register_operand" "=w")
6426         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6427                       (match_operand:V2DI 2 "register_operand" "w")]
6428          UNSPEC_SHA512SU0))]
6429   "TARGET_SIMD && TARGET_SHA3"
6430   "sha512su0\\t%0.2d, %2.2d"
6431   [(set_attr "type" "crypto_sha512")]
6432 )
6433
6434 (define_insn "aarch64_crypto_sha512su1qv2di"
6435   [(set (match_operand:V2DI 0 "register_operand" "=w")
6436         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6437                       (match_operand:V2DI 2 "register_operand" "w")
6438                       (match_operand:V2DI 3 "register_operand" "w")]
6439          UNSPEC_SHA512SU1))]
6440   "TARGET_SIMD && TARGET_SHA3"
6441   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6442   [(set_attr "type" "crypto_sha512")]
6443 )
6444
6445 ;; sha3
6446
6447 (define_insn "eor3q<mode>4"
6448   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6449         (xor:VQ_I
6450          (xor:VQ_I
6451           (match_operand:VQ_I 2 "register_operand" "w")
6452           (match_operand:VQ_I 3 "register_operand" "w"))
6453          (match_operand:VQ_I 1 "register_operand" "w")))]
6454   "TARGET_SIMD && TARGET_SHA3"
6455   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6456   [(set_attr "type" "crypto_sha3")]
6457 )
6458
6459 (define_insn "aarch64_rax1qv2di"
6460   [(set (match_operand:V2DI 0 "register_operand" "=w")
6461         (xor:V2DI
6462          (rotate:V2DI
6463           (match_operand:V2DI 2 "register_operand" "w")
6464           (const_int 1))
6465          (match_operand:V2DI 1 "register_operand" "w")))]
6466   "TARGET_SIMD && TARGET_SHA3"
6467   "rax1\\t%0.2d, %1.2d, %2.2d"
6468   [(set_attr "type" "crypto_sha3")]
6469 )
6470
6471 (define_insn "aarch64_xarqv2di"
6472   [(set (match_operand:V2DI 0 "register_operand" "=w")
6473         (rotatert:V2DI
6474          (xor:V2DI
6475           (match_operand:V2DI 1 "register_operand" "%w")
6476           (match_operand:V2DI 2 "register_operand" "w"))
6477          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6478   "TARGET_SIMD && TARGET_SHA3"
6479   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6480   [(set_attr "type" "crypto_sha3")]
6481 )
6482
6483 (define_insn "bcaxq<mode>4"
6484   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6485         (xor:VQ_I
6486          (and:VQ_I
6487           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6488           (match_operand:VQ_I 2 "register_operand" "w"))
6489          (match_operand:VQ_I 1 "register_operand" "w")))]
6490   "TARGET_SIMD && TARGET_SHA3"
6491   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6492   [(set_attr "type" "crypto_sha3")]
6493 )
6494
6495 ;; SM3
6496
6497 (define_insn "aarch64_sm3ss1qv4si"
6498   [(set (match_operand:V4SI 0 "register_operand" "=w")
6499         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6500                       (match_operand:V4SI 2 "register_operand" "w")
6501                       (match_operand:V4SI 3 "register_operand" "w")]
6502          UNSPEC_SM3SS1))]
6503   "TARGET_SIMD && TARGET_SM4"
6504   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6505   [(set_attr "type" "crypto_sm3")]
6506 )
6507
6508
6509 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6510   [(set (match_operand:V4SI 0 "register_operand" "=w")
6511         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6512                       (match_operand:V4SI 2 "register_operand" "w")
6513                       (match_operand:V4SI 3 "register_operand" "w")
6514                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6515          CRYPTO_SM3TT))]
6516   "TARGET_SIMD && TARGET_SM4"
6517   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6518   [(set_attr "type" "crypto_sm3")]
6519 )
6520
6521 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6522   [(set (match_operand:V4SI 0 "register_operand" "=w")
6523         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6524                       (match_operand:V4SI 2 "register_operand" "w")
6525                       (match_operand:V4SI 3 "register_operand" "w")]
6526          CRYPTO_SM3PART))]
6527   "TARGET_SIMD && TARGET_SM4"
6528   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6529   [(set_attr "type" "crypto_sm3")]
6530 )
6531
6532 ;; SM4
6533
6534 (define_insn "aarch64_sm4eqv4si"
6535   [(set (match_operand:V4SI 0 "register_operand" "=w")
6536         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6537                       (match_operand:V4SI 2 "register_operand" "w")]
6538          UNSPEC_SM4E))]
6539   "TARGET_SIMD && TARGET_SM4"
6540   "sm4e\\t%0.4s, %2.4s"
6541   [(set_attr "type" "crypto_sm4")]
6542 )
6543
6544 (define_insn "aarch64_sm4ekeyqv4si"
6545   [(set (match_operand:V4SI 0 "register_operand" "=w")
6546         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6547                       (match_operand:V4SI 2 "register_operand" "w")]
6548          UNSPEC_SM4EKEY))]
6549   "TARGET_SIMD && TARGET_SM4"
6550   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6551   [(set_attr "type" "crypto_sm4")]
6552 )
6553
6554 ;; fp16fml
6555
6556 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6557   [(set (match_operand:VDQSF 0 "register_operand")
6558         (unspec:VDQSF
6559          [(match_operand:VDQSF 1 "register_operand")
6560           (match_operand:<VFMLA_W> 2 "register_operand")
6561           (match_operand:<VFMLA_W> 3 "register_operand")]
6562          VFMLA16_LOW))]
6563   "TARGET_F16FML"
6564 {
6565   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6566                                             <nunits> * 2, false);
6567   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6568                                             <nunits> * 2, false);
6569
6570   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6571                                                                 operands[1],
6572                                                                 operands[2],
6573                                                                 operands[3],
6574                                                                 p1, p2));
6575   DONE;
6576
6577 })
6578
6579 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6580   [(set (match_operand:VDQSF 0 "register_operand")
6581         (unspec:VDQSF
6582          [(match_operand:VDQSF 1 "register_operand")
6583           (match_operand:<VFMLA_W> 2 "register_operand")
6584           (match_operand:<VFMLA_W> 3 "register_operand")]
6585          VFMLA16_HIGH))]
6586   "TARGET_F16FML"
6587 {
6588   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6589   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6590
6591   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6592                                                                  operands[1],
6593                                                                  operands[2],
6594                                                                  operands[3],
6595                                                                  p1, p2));
6596   DONE;
6597 })
6598
6599 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6600   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6601         (fma:VDQSF
6602          (float_extend:VDQSF
6603           (vec_select:<VFMLA_SEL_W>
6604            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6605            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6606          (float_extend:VDQSF
6607           (vec_select:<VFMLA_SEL_W>
6608            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6609            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6610          (match_operand:VDQSF 1 "register_operand" "0")))]
6611   "TARGET_F16FML"
6612   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6613   [(set_attr "type" "neon_fp_mul_s")]
6614 )
6615
6616 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6617   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6618         (fma:VDQSF
6619          (float_extend:VDQSF
6620           (neg:<VFMLA_SEL_W>
6621            (vec_select:<VFMLA_SEL_W>
6622             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6623             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6624          (float_extend:VDQSF
6625           (vec_select:<VFMLA_SEL_W>
6626            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6627            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6628          (match_operand:VDQSF 1 "register_operand" "0")))]
6629   "TARGET_F16FML"
6630   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6631   [(set_attr "type" "neon_fp_mul_s")]
6632 )
6633
6634 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6635   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6636         (fma:VDQSF
6637          (float_extend:VDQSF
6638           (vec_select:<VFMLA_SEL_W>
6639            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6640            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6641          (float_extend:VDQSF
6642           (vec_select:<VFMLA_SEL_W>
6643            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6644            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6645          (match_operand:VDQSF 1 "register_operand" "0")))]
6646   "TARGET_F16FML"
6647   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6648   [(set_attr "type" "neon_fp_mul_s")]
6649 )
6650
6651 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6652   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6653         (fma:VDQSF
6654          (float_extend:VDQSF
6655           (neg:<VFMLA_SEL_W>
6656            (vec_select:<VFMLA_SEL_W>
6657             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6658             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6659          (float_extend:VDQSF
6660           (vec_select:<VFMLA_SEL_W>
6661            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6662            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6663          (match_operand:VDQSF 1 "register_operand" "0")))]
6664   "TARGET_F16FML"
6665   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6666   [(set_attr "type" "neon_fp_mul_s")]
6667 )
6668
6669 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6670   [(set (match_operand:V2SF 0 "register_operand")
6671         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6672                            (match_operand:V4HF 2 "register_operand")
6673                            (match_operand:V4HF 3 "register_operand")
6674                            (match_operand:SI 4 "aarch64_imm2")]
6675          VFMLA16_LOW))]
6676   "TARGET_F16FML"
6677 {
6678     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6679     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6680
6681     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6682                                                             operands[1],
6683                                                             operands[2],
6684                                                             operands[3],
6685                                                             p1, lane));
6686     DONE;
6687 }
6688 )
6689
6690 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6691   [(set (match_operand:V2SF 0 "register_operand")
6692         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6693                            (match_operand:V4HF 2 "register_operand")
6694                            (match_operand:V4HF 3 "register_operand")
6695                            (match_operand:SI 4 "aarch64_imm2")]
6696          VFMLA16_HIGH))]
6697   "TARGET_F16FML"
6698 {
6699     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6700     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6701
6702     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6703                                                              operands[1],
6704                                                              operands[2],
6705                                                              operands[3],
6706                                                              p1, lane));
6707     DONE;
6708 })
6709
6710 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6711   [(set (match_operand:V2SF 0 "register_operand" "=w")
6712         (fma:V2SF
6713          (float_extend:V2SF
6714            (vec_select:V2HF
6715             (match_operand:V4HF 2 "register_operand" "w")
6716             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6717          (float_extend:V2SF
6718            (vec_duplicate:V2HF
6719             (vec_select:HF
6720              (match_operand:V4HF 3 "register_operand" "x")
6721              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6722          (match_operand:V2SF 1 "register_operand" "0")))]
6723   "TARGET_F16FML"
6724   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6725   [(set_attr "type" "neon_fp_mul_s")]
6726 )
6727
6728 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6729   [(set (match_operand:V2SF 0 "register_operand" "=w")
6730         (fma:V2SF
6731          (float_extend:V2SF
6732           (neg:V2HF
6733            (vec_select:V2HF
6734             (match_operand:V4HF 2 "register_operand" "w")
6735             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6736          (float_extend:V2SF
6737           (vec_duplicate:V2HF
6738            (vec_select:HF
6739             (match_operand:V4HF 3 "register_operand" "x")
6740             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6741          (match_operand:V2SF 1 "register_operand" "0")))]
6742   "TARGET_F16FML"
6743   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6744   [(set_attr "type" "neon_fp_mul_s")]
6745 )
6746
6747 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6748   [(set (match_operand:V2SF 0 "register_operand" "=w")
6749         (fma:V2SF
6750          (float_extend:V2SF
6751            (vec_select:V2HF
6752             (match_operand:V4HF 2 "register_operand" "w")
6753             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6754          (float_extend:V2SF
6755            (vec_duplicate:V2HF
6756             (vec_select:HF
6757              (match_operand:V4HF 3 "register_operand" "x")
6758              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6759          (match_operand:V2SF 1 "register_operand" "0")))]
6760   "TARGET_F16FML"
6761   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6762   [(set_attr "type" "neon_fp_mul_s")]
6763 )
6764
6765 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6766   [(set (match_operand:V2SF 0 "register_operand" "=w")
6767         (fma:V2SF
6768          (float_extend:V2SF
6769            (neg:V2HF
6770             (vec_select:V2HF
6771              (match_operand:V4HF 2 "register_operand" "w")
6772              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6773          (float_extend:V2SF
6774            (vec_duplicate:V2HF
6775             (vec_select:HF
6776              (match_operand:V4HF 3 "register_operand" "x")
6777              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6778          (match_operand:V2SF 1 "register_operand" "0")))]
6779   "TARGET_F16FML"
6780   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6781   [(set_attr "type" "neon_fp_mul_s")]
6782 )
6783
6784 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6785   [(set (match_operand:V4SF 0 "register_operand")
6786         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6787                            (match_operand:V8HF 2 "register_operand")
6788                            (match_operand:V8HF 3 "register_operand")
6789                            (match_operand:SI 4 "aarch64_lane_imm3")]
6790          VFMLA16_LOW))]
6791   "TARGET_F16FML"
6792 {
6793     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6794     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6795
6796     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6797                                                               operands[1],
6798                                                               operands[2],
6799                                                               operands[3],
6800                                                               p1, lane));
6801     DONE;
6802 })
6803
6804 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6805   [(set (match_operand:V4SF 0 "register_operand")
6806         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6807                            (match_operand:V8HF 2 "register_operand")
6808                            (match_operand:V8HF 3 "register_operand")
6809                            (match_operand:SI 4 "aarch64_lane_imm3")]
6810          VFMLA16_HIGH))]
6811   "TARGET_F16FML"
6812 {
6813     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6814     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6815
6816     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6817                                                                operands[1],
6818                                                                operands[2],
6819                                                                operands[3],
6820                                                                p1, lane));
6821     DONE;
6822 })
6823
6824 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6825   [(set (match_operand:V4SF 0 "register_operand" "=w")
6826         (fma:V4SF
6827          (float_extend:V4SF
6828           (vec_select:V4HF
6829             (match_operand:V8HF 2 "register_operand" "w")
6830             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6831          (float_extend:V4SF
6832           (vec_duplicate:V4HF
6833            (vec_select:HF
6834             (match_operand:V8HF 3 "register_operand" "x")
6835             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6836          (match_operand:V4SF 1 "register_operand" "0")))]
6837   "TARGET_F16FML"
6838   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6839   [(set_attr "type" "neon_fp_mul_s")]
6840 )
6841
6842 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6843   [(set (match_operand:V4SF 0 "register_operand" "=w")
6844         (fma:V4SF
6845           (float_extend:V4SF
6846            (neg:V4HF
6847             (vec_select:V4HF
6848              (match_operand:V8HF 2 "register_operand" "w")
6849              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6850          (float_extend:V4SF
6851           (vec_duplicate:V4HF
6852            (vec_select:HF
6853             (match_operand:V8HF 3 "register_operand" "x")
6854             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6855          (match_operand:V4SF 1 "register_operand" "0")))]
6856   "TARGET_F16FML"
6857   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6858   [(set_attr "type" "neon_fp_mul_s")]
6859 )
6860
6861 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6862   [(set (match_operand:V4SF 0 "register_operand" "=w")
6863         (fma:V4SF
6864          (float_extend:V4SF
6865           (vec_select:V4HF
6866             (match_operand:V8HF 2 "register_operand" "w")
6867             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6868          (float_extend:V4SF
6869           (vec_duplicate:V4HF
6870            (vec_select:HF
6871             (match_operand:V8HF 3 "register_operand" "x")
6872             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6873          (match_operand:V4SF 1 "register_operand" "0")))]
6874   "TARGET_F16FML"
6875   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6876   [(set_attr "type" "neon_fp_mul_s")]
6877 )
6878
6879 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6880   [(set (match_operand:V4SF 0 "register_operand" "=w")
6881         (fma:V4SF
6882          (float_extend:V4SF
6883           (neg:V4HF
6884            (vec_select:V4HF
6885             (match_operand:V8HF 2 "register_operand" "w")
6886             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6887          (float_extend:V4SF
6888           (vec_duplicate:V4HF
6889            (vec_select:HF
6890             (match_operand:V8HF 3 "register_operand" "x")
6891             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6892          (match_operand:V4SF 1 "register_operand" "0")))]
6893   "TARGET_F16FML"
6894   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6895   [(set_attr "type" "neon_fp_mul_s")]
6896 )
6897
6898 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6899   [(set (match_operand:V2SF 0 "register_operand")
6900         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6901                       (match_operand:V4HF 2 "register_operand")
6902                       (match_operand:V8HF 3 "register_operand")
6903                       (match_operand:SI 4 "aarch64_lane_imm3")]
6904          VFMLA16_LOW))]
6905   "TARGET_F16FML"
6906 {
6907     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6908     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6909
6910     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6911                                                              operands[1],
6912                                                              operands[2],
6913                                                              operands[3],
6914                                                              p1, lane));
6915     DONE;
6916
6917 })
6918
6919 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6920   [(set (match_operand:V2SF 0 "register_operand")
6921         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6922                       (match_operand:V4HF 2 "register_operand")
6923                       (match_operand:V8HF 3 "register_operand")
6924                       (match_operand:SI 4 "aarch64_lane_imm3")]
6925          VFMLA16_HIGH))]
6926   "TARGET_F16FML"
6927 {
6928     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6929     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6930
6931     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6932                                                               operands[1],
6933                                                               operands[2],
6934                                                               operands[3],
6935                                                               p1, lane));
6936     DONE;
6937
6938 })
6939
6940 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6941   [(set (match_operand:V2SF 0 "register_operand" "=w")
6942         (fma:V2SF
6943          (float_extend:V2SF
6944            (vec_select:V2HF
6945             (match_operand:V4HF 2 "register_operand" "w")
6946             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6947          (float_extend:V2SF
6948           (vec_duplicate:V2HF
6949            (vec_select:HF
6950             (match_operand:V8HF 3 "register_operand" "x")
6951             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6952          (match_operand:V2SF 1 "register_operand" "0")))]
6953   "TARGET_F16FML"
6954   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6955   [(set_attr "type" "neon_fp_mul_s")]
6956 )
6957
6958 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6959   [(set (match_operand:V2SF 0 "register_operand" "=w")
6960         (fma:V2SF
6961          (float_extend:V2SF
6962           (neg:V2HF
6963            (vec_select:V2HF
6964             (match_operand:V4HF 2 "register_operand" "w")
6965             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6966          (float_extend:V2SF
6967           (vec_duplicate:V2HF
6968            (vec_select:HF
6969             (match_operand:V8HF 3 "register_operand" "x")
6970             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6971          (match_operand:V2SF 1 "register_operand" "0")))]
6972   "TARGET_F16FML"
6973   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6974   [(set_attr "type" "neon_fp_mul_s")]
6975 )
6976
6977 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6978   [(set (match_operand:V2SF 0 "register_operand" "=w")
6979         (fma:V2SF
6980          (float_extend:V2SF
6981            (vec_select:V2HF
6982             (match_operand:V4HF 2 "register_operand" "w")
6983             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6984          (float_extend:V2SF
6985           (vec_duplicate:V2HF
6986            (vec_select:HF
6987             (match_operand:V8HF 3 "register_operand" "x")
6988             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6989          (match_operand:V2SF 1 "register_operand" "0")))]
6990   "TARGET_F16FML"
6991   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6992   [(set_attr "type" "neon_fp_mul_s")]
6993 )
6994
6995 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6996   [(set (match_operand:V2SF 0 "register_operand" "=w")
6997         (fma:V2SF
6998          (float_extend:V2SF
6999           (neg:V2HF
7000            (vec_select:V2HF
7001             (match_operand:V4HF 2 "register_operand" "w")
7002             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7003          (float_extend:V2SF
7004           (vec_duplicate:V2HF
7005            (vec_select:HF
7006             (match_operand:V8HF 3 "register_operand" "x")
7007             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7008          (match_operand:V2SF 1 "register_operand" "0")))]
7009   "TARGET_F16FML"
7010   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
7011   [(set_attr "type" "neon_fp_mul_s")]
7012 )
7013
7014 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
7015   [(set (match_operand:V4SF 0 "register_operand")
7016         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7017                       (match_operand:V8HF 2 "register_operand")
7018                       (match_operand:V4HF 3 "register_operand")
7019                       (match_operand:SI 4 "aarch64_imm2")]
7020          VFMLA16_LOW))]
7021   "TARGET_F16FML"
7022 {
7023     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
7024     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7025
7026     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
7027                                                              operands[1],
7028                                                              operands[2],
7029                                                              operands[3],
7030                                                              p1, lane));
7031     DONE;
7032 })
7033
7034 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
7035   [(set (match_operand:V4SF 0 "register_operand")
7036         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7037                       (match_operand:V8HF 2 "register_operand")
7038                       (match_operand:V4HF 3 "register_operand")
7039                       (match_operand:SI 4 "aarch64_imm2")]
7040          VFMLA16_HIGH))]
7041   "TARGET_F16FML"
7042 {
7043     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
7044     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7045
7046     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
7047                                                               operands[1],
7048                                                               operands[2],
7049                                                               operands[3],
7050                                                               p1, lane));
7051     DONE;
7052 })
7053
7054 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
7055   [(set (match_operand:V4SF 0 "register_operand" "=w")
7056         (fma:V4SF
7057          (float_extend:V4SF
7058           (vec_select:V4HF
7059            (match_operand:V8HF 2 "register_operand" "w")
7060            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7061          (float_extend:V4SF
7062           (vec_duplicate:V4HF
7063            (vec_select:HF
7064             (match_operand:V4HF 3 "register_operand" "x")
7065             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7066          (match_operand:V4SF 1 "register_operand" "0")))]
7067   "TARGET_F16FML"
7068   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7069   [(set_attr "type" "neon_fp_mul_s")]
7070 )
7071
7072 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
7073   [(set (match_operand:V4SF 0 "register_operand" "=w")
7074         (fma:V4SF
7075          (float_extend:V4SF
7076           (neg:V4HF
7077            (vec_select:V4HF
7078             (match_operand:V8HF 2 "register_operand" "w")
7079             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7080          (float_extend:V4SF
7081           (vec_duplicate:V4HF
7082            (vec_select:HF
7083             (match_operand:V4HF 3 "register_operand" "x")
7084             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7085          (match_operand:V4SF 1 "register_operand" "0")))]
7086   "TARGET_F16FML"
7087   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7088   [(set_attr "type" "neon_fp_mul_s")]
7089 )
7090
7091 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
7092   [(set (match_operand:V4SF 0 "register_operand" "=w")
7093         (fma:V4SF
7094          (float_extend:V4SF
7095           (vec_select:V4HF
7096            (match_operand:V8HF 2 "register_operand" "w")
7097            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7098          (float_extend:V4SF
7099           (vec_duplicate:V4HF
7100            (vec_select:HF
7101             (match_operand:V4HF 3 "register_operand" "x")
7102             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7103          (match_operand:V4SF 1 "register_operand" "0")))]
7104   "TARGET_F16FML"
7105   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7106   [(set_attr "type" "neon_fp_mul_s")]
7107 )
7108
7109 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
7110   [(set (match_operand:V4SF 0 "register_operand" "=w")
7111         (fma:V4SF
7112          (float_extend:V4SF
7113           (neg:V4HF
7114            (vec_select:V4HF
7115             (match_operand:V8HF 2 "register_operand" "w")
7116             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7117          (float_extend:V4SF
7118           (vec_duplicate:V4HF
7119            (vec_select:HF
7120             (match_operand:V4HF 3 "register_operand" "x")
7121             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7122          (match_operand:V4SF 1 "register_operand" "0")))]
7123   "TARGET_F16FML"
7124   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7125   [(set_attr "type" "neon_fp_mul_s")]
7126 )
7127
7128 ;; pmull
7129
7130 (define_insn "aarch64_crypto_pmulldi"
7131   [(set (match_operand:TI 0 "register_operand" "=w")
7132         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
7133                      (match_operand:DI 2 "register_operand" "w")]
7134                     UNSPEC_PMULL))]
7135  "TARGET_SIMD && TARGET_AES"
7136  "pmull\\t%0.1q, %1.1d, %2.1d"
7137   [(set_attr "type" "crypto_pmull")]
7138 )
7139
7140 (define_insn "aarch64_crypto_pmullv2di"
7141  [(set (match_operand:TI 0 "register_operand" "=w")
7142        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
7143                    (match_operand:V2DI 2 "register_operand" "w")]
7144                   UNSPEC_PMULL2))]
7145   "TARGET_SIMD && TARGET_AES"
7146   "pmull2\\t%0.1q, %1.2d, %2.2d"
7147   [(set_attr "type" "crypto_pmull")]
7148 )
7149
7150 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
7151 (define_insn "<optab><Vnarrowq><mode>2"
7152   [(set (match_operand:VQN 0 "register_operand" "=w")
7153         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7154   "TARGET_SIMD"
7155   "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
7156   [(set_attr "type" "neon_shift_imm_long")]
7157 )
7158
7159 ;; Truncate a 128-bit integer vector to a 64-bit vector.
7160 (define_insn "trunc<mode><Vnarrowq>2"
7161   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7162         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7163   "TARGET_SIMD"
7164   "xtn\t%0.<Vntype>, %1.<Vtype>"
7165   [(set_attr "type" "neon_shift_imm_narrow_q")]
7166 )
7167
7168 (define_insn "aarch64_bfdot<mode>"
7169   [(set (match_operand:VDQSF 0 "register_operand" "=w")
7170         (plus:VDQSF
7171           (unspec:VDQSF
7172            [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
7173             (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
7174             UNSPEC_BFDOT)
7175           (match_operand:VDQSF 1 "register_operand" "0")))]
7176   "TARGET_BF16_SIMD"
7177   "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
7178   [(set_attr "type" "neon_dot<q>")]
7179 )
7180
7181 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
7182   [(set (match_operand:VDQSF 0 "register_operand" "=w")
7183         (plus:VDQSF
7184           (unspec:VDQSF
7185            [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
7186             (match_operand:VBF 3 "register_operand" "w")
7187             (match_operand:SI 4 "const_int_operand" "n")]
7188             UNSPEC_BFDOT)
7189           (match_operand:VDQSF 1 "register_operand" "0")))]
7190   "TARGET_BF16_SIMD"
7191 {
7192   int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
7193   int lane = INTVAL (operands[4]);
7194   operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
7195   return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
7196 }
7197   [(set_attr "type" "neon_dot<VDQSF:q>")]
7198 )
7199
7200 ;; bfmmla
7201 (define_insn "aarch64_bfmmlaqv4sf"
7202   [(set (match_operand:V4SF 0 "register_operand" "=w")
7203         (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
7204                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7205                                  (match_operand:V8BF 3 "register_operand" "w")]
7206                     UNSPEC_BFMMLA)))]
7207   "TARGET_BF16_SIMD"
7208   "bfmmla\\t%0.4s, %2.8h, %3.8h"
7209   [(set_attr "type" "neon_fp_mla_s_q")]
7210 )
7211
7212 ;; bfmlal<bt>
7213 (define_insn "aarch64_bfmlal<bt>v4sf"
7214   [(set (match_operand:V4SF 0 "register_operand" "=w")
7215         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7216                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7217                                   (match_operand:V8BF 3 "register_operand" "w")]
7218                      BF_MLA)))]
7219   "TARGET_BF16_SIMD"
7220   "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
7221   [(set_attr "type" "neon_fp_mla_s_q")]
7222 )
7223
7224 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
7225   [(set (match_operand:V4SF 0 "register_operand" "=w")
7226         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7227                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7228                                   (match_operand:VBF 3 "register_operand" "w")
7229                                   (match_operand:SI 4 "const_int_operand" "n")]
7230                      BF_MLA)))]
7231   "TARGET_BF16_SIMD"
7232 {
7233   operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
7234   return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
7235 }
7236   [(set_attr "type" "neon_fp_mla_s_scalar_q")]
7237 )
7238
7239 ;; 8-bit integer matrix multiply-accumulate
7240 (define_insn "aarch64_simd_<sur>mmlav16qi"
7241   [(set (match_operand:V4SI 0 "register_operand" "=w")
7242         (plus:V4SI
7243          (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
7244                        (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
7245          (match_operand:V4SI 1 "register_operand" "0")))]
7246   "TARGET_I8MM"
7247   "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
7248   [(set_attr "type" "neon_mla_s_q")]
7249 )
7250
7251 ;; bfcvtn
7252 (define_insn "aarch64_bfcvtn<q><mode>"
7253   [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
7254         (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
7255                             UNSPEC_BFCVTN))]
7256   "TARGET_BF16_SIMD"
7257   "bfcvtn\\t%0.4h, %1.4s"
7258   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7259 )
7260
7261 (define_insn "aarch64_bfcvtn2v8bf"
7262   [(set (match_operand:V8BF 0 "register_operand" "=w")
7263         (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
7264                       (match_operand:V4SF 2 "register_operand" "w")]
7265                       UNSPEC_BFCVTN2))]
7266   "TARGET_BF16_SIMD"
7267   "bfcvtn2\\t%0.8h, %2.4s"
7268   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7269 )
7270
7271 (define_insn "aarch64_bfcvtbf"
7272   [(set (match_operand:BF 0 "register_operand" "=w")
7273         (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
7274                     UNSPEC_BFCVT))]
7275   "TARGET_BF16_FP"
7276   "bfcvt\\t%h0, %s1"
7277   [(set_attr "type" "f_cvt")]
7278 )