gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umq,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_split
 209   [(set (match_operand:VQ 0 "register_operand" "")
 210       (match_operand:VQ 1 "register_operand" ""))]
 211   "TARGET_SIMD && reload_completed
 212    && GP_REGNUM_P (REGNO (operands[0]))
 213    && GP_REGNUM_P (REGNO (operands[1]))"
 214   [(const_int 0)]
 215 {
 216   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 217   DONE;
 218 })
 219
 220 (define_split
 221   [(set (match_operand:VQ 0 "register_operand" "")
 222         (match_operand:VQ 1 "register_operand" ""))]
 223   "TARGET_SIMD && reload_completed
 224    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 225        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 226   [(const_int 0)]
 227 {
 228   aarch64_split_simd_move (operands[0], operands[1]);
 229   DONE;
 230 })
 231
 232 (define_expand "aarch64_split_simd_mov<mode>"
 233   [(set (match_operand:VQ 0)
 234         (match_operand:VQ 1))]
 235   "TARGET_SIMD"
 236   {
 237     rtx dst = operands[0];
 238     rtx src = operands[1];
 239
 240     if (GP_REGNUM_P (REGNO (src)))
 241       {
 242         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 243         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 244
 245         emit_insn
 246           (gen_move_lo_quad_<mode> (dst, src_low_part));
 247         emit_insn
 248           (gen_move_hi_quad_<mode> (dst, src_high_part));
 249       }
 250
 251     else
 252       {
 253         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 254         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 255         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 256         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 257
 258         emit_insn
 259           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 260         emit_insn
 261           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 262       }
 263     DONE;
 264   }
 265 )
 266
 267 (define_insn "aarch64_simd_mov_from_<mode>low"
 268   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 269         (vec_select:<VHALF>
 270           (match_operand:VQ 1 "register_operand" "w")
 271           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 272   "TARGET_SIMD && reload_completed"
 273   "umov\t%0, %1.d[0]"
 274   [(set_attr "type" "neon_to_gp<q>")
 275    (set_attr "length" "4")
 276   ])
 277
 278 (define_insn "aarch64_simd_mov_from_<mode>high"
 279   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 280         (vec_select:<VHALF>
 281           (match_operand:VQ 1 "register_operand" "w")
 282           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 283   "TARGET_SIMD && reload_completed"
 284   "umov\t%0, %1.d[1]"
 285   [(set_attr "type" "neon_to_gp<q>")
 286    (set_attr "length" "4")
 287   ])
 288
 289 (define_insn "orn<mode>3"
 290  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 291        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 292                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 293  "TARGET_SIMD"
 294  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 295   [(set_attr "type" "neon_logic<q>")]
 296 )
 297
 298 (define_insn "bic<mode>3"
 299  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 300        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 301                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 302  "TARGET_SIMD"
 303  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 304   [(set_attr "type" "neon_logic<q>")]
 305 )
 306
 307 (define_insn "add<mode>3"
 308   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 309         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 310                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 311   "TARGET_SIMD"
 312   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 313   [(set_attr "type" "neon_add<q>")]
 314 )
 315
 316 (define_insn "sub<mode>3"
 317   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 318         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 319                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 320   "TARGET_SIMD"
 321   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 322   [(set_attr "type" "neon_sub<q>")]
 323 )
 324
 325 (define_insn "mul<mode>3"
 326   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 327         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 328                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 329   "TARGET_SIMD"
 330   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 331   [(set_attr "type" "neon_mul_<Vetype><q>")]
 332 )
 333
 334 (define_insn "bswap<mode>2"
 335   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 336         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 337   "TARGET_SIMD"
 338   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 339   [(set_attr "type" "neon_rev<q>")]
 340 )
 341
 342 (define_insn "aarch64_rbit<mode>"
 343   [(set (match_operand:VB 0 "register_operand" "=w")
 344         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 345                    UNSPEC_RBIT))]
 346   "TARGET_SIMD"
 347   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 348   [(set_attr "type" "neon_rbit")]
 349 )
 350
 351 (define_expand "ctz<mode>2"
 352   [(set (match_operand:VS 0 "register_operand")
 353         (ctz:VS (match_operand:VS 1 "register_operand")))]
 354   "TARGET_SIMD"
 355   {
 356      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 357      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 358                                              <MODE>mode, 0);
 359      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 360      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 361      DONE;
 362   }
 363 )
 364
 365 (define_expand "xorsign<mode>3"
 366   [(match_operand:VHSDF 0 "register_operand")
 367    (match_operand:VHSDF 1 "register_operand")
 368    (match_operand:VHSDF 2 "register_operand")]
 369   "TARGET_SIMD"
 370 {
 371
 372   machine_mode imode = <V_INT_EQUIV>mode;
 373   rtx v_bitmask = gen_reg_rtx (imode);
 374   rtx op1x = gen_reg_rtx (imode);
 375   rtx op2x = gen_reg_rtx (imode);
 376
 377   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 378   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 379
 380   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 381
 382   emit_move_insn (v_bitmask,
 383                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 384                                                      HOST_WIDE_INT_M1U << bits));
 385
 386   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 387   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 388   emit_move_insn (operands[0],
 389                   lowpart_subreg (<MODE>mode, op1x, imode));
 390   DONE;
 391 }
 392 )
 393
 394 ;; These instructions map to the __builtins for the Dot Product operations.
 395 (define_insn "aarch64_<sur>dot<vsi2qi>"
 396   [(set (match_operand:VS 0 "register_operand" "=w")
 397         (plus:VS (match_operand:VS 1 "register_operand" "0")
 398                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 399                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 400                 DOTPROD)))]
 401   "TARGET_DOTPROD"
 402   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 403   [(set_attr "type" "neon_dot")]
 404 )
 405
 406 ;; These expands map to the Dot Product optab the vectorizer checks for.
 407 ;; The auto-vectorizer expects a dot product builtin that also does an
 408 ;; accumulation into the provided register.
 409 ;; Given the following pattern
 410 ;;
 411 ;; for (i=0; i<len; i++) {
 412 ;;     c = a[i] * b[i];
 413 ;;     r += c;
 414 ;; }
 415 ;; return result;
 416 ;;
 417 ;; This can be auto-vectorized to
 418 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 419 ;;
 420 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 421 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 422 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 423 ;; ...
 424 ;;
 425 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 426 (define_expand "<sur>dot_prod<vsi2qi>"
 427   [(set (match_operand:VS 0 "register_operand")
 428         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 429                             (match_operand:<VSI2QI> 2 "register_operand")]
 430                  DOTPROD)
 431                 (match_operand:VS 3 "register_operand")))]
 432   "TARGET_DOTPROD"
 433 {
 434   emit_insn (
 435     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 436                                     operands[2]));
 437   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 438   DONE;
 439 })
 440
 441 ;; These instructions map to the __builtins for the Dot Product
 442 ;; indexed operations.
 443 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 444   [(set (match_operand:VS 0 "register_operand" "=w")
 445         (plus:VS (match_operand:VS 1 "register_operand" "0")
 446                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 447                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 448                             (match_operand:SI 4 "immediate_operand" "i")]
 449                 DOTPROD)))]
 450   "TARGET_DOTPROD"
 451   {
 452     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 453     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 454   }
 455   [(set_attr "type" "neon_dot")]
 456 )
 457
 458 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 459   [(set (match_operand:VS 0 "register_operand" "=w")
 460         (plus:VS (match_operand:VS 1 "register_operand" "0")
 461                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 462                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 463                             (match_operand:SI 4 "immediate_operand" "i")]
 464                 DOTPROD)))]
 465   "TARGET_DOTPROD"
 466   {
 467     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 468     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 469   }
 470   [(set_attr "type" "neon_dot")]
 471 )
 472
 473 (define_expand "copysign<mode>3"
 474   [(match_operand:VHSDF 0 "register_operand")
 475    (match_operand:VHSDF 1 "register_operand")
 476    (match_operand:VHSDF 2 "register_operand")]
 477   "TARGET_FLOAT && TARGET_SIMD"
 478 {
 479   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 480   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 481
 482   emit_move_insn (v_bitmask,
 483                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 484                                                      HOST_WIDE_INT_M1U << bits));
 485   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 486                                          operands[2], operands[1]));
 487   DONE;
 488 }
 489 )
 490
 491 (define_insn "*aarch64_mul3_elt<mode>"
 492  [(set (match_operand:VMUL 0 "register_operand" "=w")
 493     (mult:VMUL
 494       (vec_duplicate:VMUL
 495           (vec_select:<VEL>
 496             (match_operand:VMUL 1 "register_operand" "<h_con>")
 497             (parallel [(match_operand:SI 2 "immediate_operand")])))
 498       (match_operand:VMUL 3 "register_operand" "w")))]
 499   "TARGET_SIMD"
 500   {
 501     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 502     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 503   }
 504   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 505 )
 506
 507 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 508   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 509      (mult:VMUL_CHANGE_NLANES
 510        (vec_duplicate:VMUL_CHANGE_NLANES
 511           (vec_select:<VEL>
 512             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 513             (parallel [(match_operand:SI 2 "immediate_operand")])))
 514       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 515   "TARGET_SIMD"
 516   {
 517     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 518     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 519   }
 520   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 521 )
 522
 523 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 524  [(set (match_operand:VMUL 0 "register_operand" "=w")
 525     (mult:VMUL
 526       (vec_duplicate:VMUL
 527             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 528       (match_operand:VMUL 2 "register_operand" "w")))]
 529   "TARGET_SIMD"
 530   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 531   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 532 )
 533
 534 (define_insn "aarch64_rsqrte<mode>"
 535   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 536         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 537                      UNSPEC_RSQRTE))]
 538   "TARGET_SIMD"
 539   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 540   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 541
 542 (define_insn "aarch64_rsqrts<mode>"
 543   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 544         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 545                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 546          UNSPEC_RSQRTS))]
 547   "TARGET_SIMD"
 548   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 549   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 550
 551 (define_expand "rsqrt<mode>2"
 552   [(set (match_operand:VALLF 0 "register_operand" "=w")
 553         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 554                      UNSPEC_RSQRT))]
 555   "TARGET_SIMD"
 556 {
 557   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 558   DONE;
 559 })
 560
 561 (define_insn "*aarch64_mul3_elt_to_64v2df"
 562   [(set (match_operand:DF 0 "register_operand" "=w")
 563      (mult:DF
 564        (vec_select:DF
 565          (match_operand:V2DF 1 "register_operand" "w")
 566          (parallel [(match_operand:SI 2 "immediate_operand")]))
 567        (match_operand:DF 3 "register_operand" "w")))]
 568   "TARGET_SIMD"
 569   {
 570     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 571     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 572   }
 573   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 574 )
 575
 576 (define_insn "neg<mode>2"
 577   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 578         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 579   "TARGET_SIMD"
 580   "neg\t%0.<Vtype>, %1.<Vtype>"
 581   [(set_attr "type" "neon_neg<q>")]
 582 )
 583
 584 (define_insn "abs<mode>2"
 585   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 586         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 587   "TARGET_SIMD"
 588   "abs\t%0.<Vtype>, %1.<Vtype>"
 589   [(set_attr "type" "neon_abs<q>")]
 590 )
 591
 592 ;; The intrinsic version of integer ABS must not be allowed to
 593 ;; combine with any operation with an integerated ABS step, such
 594 ;; as SABD.
 595 (define_insn "aarch64_abs<mode>"
 596   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 597           (unspec:VSDQ_I_DI
 598             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 599            UNSPEC_ABS))]
 600   "TARGET_SIMD"
 601   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 602   [(set_attr "type" "neon_abs<q>")]
 603 )
 604
 605 (define_insn "abd<mode>_3"
 606   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 607         (abs:VDQ_BHSI (minus:VDQ_BHSI
 608                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 609                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 610   "TARGET_SIMD"
 611   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 612   [(set_attr "type" "neon_abd<q>")]
 613 )
 614
 615 (define_insn "aarch64_<sur>abdl2<mode>_3"
 616   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 617         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 618                           (match_operand:VDQV_S 2 "register_operand" "w")]
 619         ABDL2))]
 620   "TARGET_SIMD"
 621   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 622   [(set_attr "type" "neon_abd<q>")]
 623 )
 624
 625 (define_insn "aarch64_<sur>abal<mode>_4"
 626   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 627         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 628                           (match_operand:VDQV_S 2 "register_operand" "w")
 629                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 630         ABAL))]
 631   "TARGET_SIMD"
 632   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 633   [(set_attr "type" "neon_arith_acc<q>")]
 634 )
 635
 636 (define_insn "aarch64_<sur>adalp<mode>_3"
 637   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 638         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 639                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 640         ADALP))]
 641   "TARGET_SIMD"
 642   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 643   [(set_attr "type" "neon_reduc_add<q>")]
 644 )
 645
 646 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 647 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 648 ;; reduction of the difference into a V4SI vector and accumulate that into
 649 ;; operand 3 before copying that into the result operand 0.
 650 ;; Perform that with a sequence of:
 651 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 652 ;; UABAL        tmp.8h, op1.16b, op2.16b
 653 ;; UADALP       op3.4s, tmp.8h
 654 ;; MOV          op0, op3 // should be eliminated in later passes.
 655 ;; The signed version just uses the signed variants of the above instructions.
 656
 657 (define_expand "<sur>sadv16qi"
 658   [(use (match_operand:V4SI 0 "register_operand"))
 659    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 660                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 661    (use (match_operand:V4SI 3 "register_operand"))]
 662   "TARGET_SIMD"
 663   {
 664     rtx reduc = gen_reg_rtx (V8HImode);
 665     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 666                                                operands[2]));
 667     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 668                                               operands[2], reduc));
 669     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 670                                               operands[3]));
 671     emit_move_insn (operands[0], operands[3]);
 672     DONE;
 673   }
 674 )
 675
 676 (define_insn "aba<mode>_3"
 677   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 678         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 679                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 680                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 681                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 682   "TARGET_SIMD"
 683   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 684   [(set_attr "type" "neon_arith_acc<q>")]
 685 )
 686
 687 (define_insn "fabd<mode>3"
 688   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 689         (abs:VHSDF_HSDF
 690           (minus:VHSDF_HSDF
 691             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 692             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 693   "TARGET_SIMD"
 694   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 695   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 696 )
 697
 698 ;; For AND (vector, register) and BIC (vector, immediate)
 699 (define_insn "and<mode>3"
 700   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 701         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 702                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 703   "TARGET_SIMD"
 704   {
 705     switch (which_alternative)
 706       {
 707       case 0:
 708         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 709       case 1:
 710         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 711                                                   AARCH64_CHECK_BIC);
 712       default:
 713         gcc_unreachable ();
 714       }
 715   }
 716   [(set_attr "type" "neon_logic<q>")]
 717 )
 718
 719 ;; For ORR (vector, register) and ORR (vector, immediate)
 720 (define_insn "ior<mode>3"
 721   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 722         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 723                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 724   "TARGET_SIMD"
 725   {
 726     switch (which_alternative)
 727       {
 728       case 0:
 729         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 730       case 1:
 731         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 732                                                   AARCH64_CHECK_ORR);
 733       default:
 734         gcc_unreachable ();
 735       }
 736   }
 737   [(set_attr "type" "neon_logic<q>")]
 738 )
 739
 740 (define_insn "xor<mode>3"
 741   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 742         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 743                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 744   "TARGET_SIMD"
 745   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 746   [(set_attr "type" "neon_logic<q>")]
 747 )
 748
 749 (define_insn "one_cmpl<mode>2"
 750   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 751         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 752   "TARGET_SIMD"
 753   "not\t%0.<Vbtype>, %1.<Vbtype>"
 754   [(set_attr "type" "neon_logic<q>")]
 755 )
 756
 757 (define_insn "aarch64_simd_vec_set<mode>"
 758   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 759         (vec_merge:VALL_F16
 760             (vec_duplicate:VALL_F16
 761                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 762             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 763             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 764   "TARGET_SIMD"
 765   {
 766    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 767    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 768    switch (which_alternative)
 769      {
 770      case 0:
 771         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 772      case 1:
 773         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 774      case 2:
 775         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 776      default:
 777         gcc_unreachable ();
 778      }
 779   }
 780   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 781 )
 782
 783 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 784   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 785         (vec_merge:VALL_F16
 786             (vec_duplicate:VALL_F16
 787               (vec_select:<VEL>
 788                 (match_operand:VALL_F16 3 "register_operand" "w")
 789                 (parallel
 790                   [(match_operand:SI 4 "immediate_operand" "i")])))
 791             (match_operand:VALL_F16 1 "register_operand" "0")
 792             (match_operand:SI 2 "immediate_operand" "i")))]
 793   "TARGET_SIMD"
 794   {
 795     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 796     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 797     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 798
 799     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 800   }
 801   [(set_attr "type" "neon_ins<q>")]
 802 )
 803
 804 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 805   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 806         (vec_merge:VALL_F16_NO_V2Q
 807             (vec_duplicate:VALL_F16_NO_V2Q
 808               (vec_select:<VEL>
 809                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 810                 (parallel
 811                   [(match_operand:SI 4 "immediate_operand" "i")])))
 812             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 813             (match_operand:SI 2 "immediate_operand" "i")))]
 814   "TARGET_SIMD"
 815   {
 816     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 817     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 818     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 819                                            INTVAL (operands[4]));
 820
 821     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 822   }
 823   [(set_attr "type" "neon_ins<q>")]
 824 )
 825
 826 (define_insn "aarch64_simd_lshr<mode>"
 827  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 828        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 829                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 830  "TARGET_SIMD"
 831  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 832   [(set_attr "type" "neon_shift_imm<q>")]
 833 )
 834
 835 (define_insn "aarch64_simd_ashr<mode>"
 836  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 837        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 838                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 839  "TARGET_SIMD"
 840  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 841   [(set_attr "type" "neon_shift_imm<q>")]
 842 )
 843
 844 (define_insn "aarch64_simd_imm_shl<mode>"
 845  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 846        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 847                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 848  "TARGET_SIMD"
 849   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 850   [(set_attr "type" "neon_shift_imm<q>")]
 851 )
 852
 853 (define_insn "aarch64_simd_reg_sshl<mode>"
 854  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 855        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 856                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 857  "TARGET_SIMD"
 858  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 859   [(set_attr "type" "neon_shift_reg<q>")]
 860 )
 861
 862 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 863  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 864        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 865                     (match_operand:VDQ_I 2 "register_operand" "w")]
 866                    UNSPEC_ASHIFT_UNSIGNED))]
 867  "TARGET_SIMD"
 868  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 869   [(set_attr "type" "neon_shift_reg<q>")]
 870 )
 871
 872 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 873  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 874        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 875                     (match_operand:VDQ_I 2 "register_operand" "w")]
 876                    UNSPEC_ASHIFT_SIGNED))]
 877  "TARGET_SIMD"
 878  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 879   [(set_attr "type" "neon_shift_reg<q>")]
 880 )
 881
 882 (define_expand "ashl<mode>3"
 883   [(match_operand:VDQ_I 0 "register_operand" "")
 884    (match_operand:VDQ_I 1 "register_operand" "")
 885    (match_operand:SI  2 "general_operand" "")]
 886  "TARGET_SIMD"
 887 {
 888   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 889   int shift_amount;
 890
 891   if (CONST_INT_P (operands[2]))
 892     {
 893       shift_amount = INTVAL (operands[2]);
 894       if (shift_amount >= 0 && shift_amount < bit_width)
 895         {
 896           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 897                                                        shift_amount);
 898           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 899                                                      operands[1],
 900                                                      tmp));
 901           DONE;
 902         }
 903       else
 904         {
 905           operands[2] = force_reg (SImode, operands[2]);
 906         }
 907     }
 908   else if (MEM_P (operands[2]))
 909     {
 910       operands[2] = force_reg (SImode, operands[2]);
 911     }
 912
 913   if (REG_P (operands[2]))
 914     {
 915       rtx tmp = gen_reg_rtx (<MODE>mode);
 916       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 917                                              convert_to_mode (<VEL>mode,
 918                                                               operands[2],
 919                                                               0)));
 920       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 921                                                   tmp));
 922       DONE;
 923     }
 924   else
 925     FAIL;
 926 }
 927 )
 928
 929 (define_expand "lshr<mode>3"
 930   [(match_operand:VDQ_I 0 "register_operand" "")
 931    (match_operand:VDQ_I 1 "register_operand" "")
 932    (match_operand:SI  2 "general_operand" "")]
 933  "TARGET_SIMD"
 934 {
 935   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 936   int shift_amount;
 937
 938   if (CONST_INT_P (operands[2]))
 939     {
 940       shift_amount = INTVAL (operands[2]);
 941       if (shift_amount > 0 && shift_amount <= bit_width)
 942         {
 943           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 944                                                        shift_amount);
 945           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 946                                                   operands[1],
 947                                                   tmp));
 948           DONE;
 949         }
 950       else
 951         operands[2] = force_reg (SImode, operands[2]);
 952     }
 953   else if (MEM_P (operands[2]))
 954     {
 955       operands[2] = force_reg (SImode, operands[2]);
 956     }
 957
 958   if (REG_P (operands[2]))
 959     {
 960       rtx tmp = gen_reg_rtx (SImode);
 961       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 962       emit_insn (gen_negsi2 (tmp, operands[2]));
 963       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 964                                              convert_to_mode (<VEL>mode,
 965                                                               tmp, 0)));
 966       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 967                                                           operands[1],
 968                                                           tmp1));
 969       DONE;
 970     }
 971   else
 972     FAIL;
 973 }
 974 )
 975
 976 (define_expand "ashr<mode>3"
 977   [(match_operand:VDQ_I 0 "register_operand" "")
 978    (match_operand:VDQ_I 1 "register_operand" "")
 979    (match_operand:SI  2 "general_operand" "")]
 980  "TARGET_SIMD"
 981 {
 982   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 983   int shift_amount;
 984
 985   if (CONST_INT_P (operands[2]))
 986     {
 987       shift_amount = INTVAL (operands[2]);
 988       if (shift_amount > 0 && shift_amount <= bit_width)
 989         {
 990           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 991                                                        shift_amount);
 992           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
 993                                                   operands[1],
 994                                                   tmp));
 995           DONE;
 996         }
 997       else
 998         operands[2] = force_reg (SImode, operands[2]);
 999     }
1000   else if (MEM_P (operands[2]))
1001     {
1002       operands[2] = force_reg (SImode, operands[2]);
1003     }
1004
1005   if (REG_P (operands[2]))
1006     {
1007       rtx tmp = gen_reg_rtx (SImode);
1008       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1009       emit_insn (gen_negsi2 (tmp, operands[2]));
1010       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1011                                              convert_to_mode (<VEL>mode,
1012                                                               tmp, 0)));
1013       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1014                                                         operands[1],
1015                                                         tmp1));
1016       DONE;
1017     }
1018   else
1019     FAIL;
1020 }
1021 )
1022
1023 (define_expand "vashl<mode>3"
1024  [(match_operand:VDQ_I 0 "register_operand" "")
1025   (match_operand:VDQ_I 1 "register_operand" "")
1026   (match_operand:VDQ_I 2 "register_operand" "")]
1027  "TARGET_SIMD"
1028 {
1029   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1030                                               operands[2]));
1031   DONE;
1032 })
1033
1034 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1035 ;; Negating individual lanes most certainly offsets the
1036 ;; gain from vectorization.
1037 (define_expand "vashr<mode>3"
1038  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1039   (match_operand:VDQ_BHSI 1 "register_operand" "")
1040   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1041  "TARGET_SIMD"
1042 {
1043   rtx neg = gen_reg_rtx (<MODE>mode);
1044   emit (gen_neg<mode>2 (neg, operands[2]));
1045   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1046                                                     neg));
1047   DONE;
1048 })
1049
1050 ;; DI vector shift
1051 (define_expand "aarch64_ashr_simddi"
1052   [(match_operand:DI 0 "register_operand" "=w")
1053    (match_operand:DI 1 "register_operand" "w")
1054    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1055   "TARGET_SIMD"
1056   {
1057     /* An arithmetic shift right by 64 fills the result with copies of the sign
1058        bit, just like asr by 63 - however the standard pattern does not handle
1059        a shift by 64.  */
1060     if (INTVAL (operands[2]) == 64)
1061       operands[2] = GEN_INT (63);
1062     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1063     DONE;
1064   }
1065 )
1066
1067 (define_expand "vlshr<mode>3"
1068  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1069   (match_operand:VDQ_BHSI 1 "register_operand" "")
1070   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1071  "TARGET_SIMD"
1072 {
1073   rtx neg = gen_reg_rtx (<MODE>mode);
1074   emit (gen_neg<mode>2 (neg, operands[2]));
1075   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1076                                                       neg));
1077   DONE;
1078 })
1079
1080 (define_expand "aarch64_lshr_simddi"
1081   [(match_operand:DI 0 "register_operand" "=w")
1082    (match_operand:DI 1 "register_operand" "w")
1083    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1084   "TARGET_SIMD"
1085   {
1086     if (INTVAL (operands[2]) == 64)
1087       emit_move_insn (operands[0], const0_rtx);
1088     else
1089       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1090     DONE;
1091   }
1092 )
1093
1094 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1095 (define_insn "vec_shr_<mode>"
1096   [(set (match_operand:VD 0 "register_operand" "=w")
1097         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1098                     (match_operand:SI 2 "immediate_operand" "i")]
1099                    UNSPEC_VEC_SHR))]
1100   "TARGET_SIMD"
1101   {
1102     if (BYTES_BIG_ENDIAN)
1103       return "shl %d0, %d1, %2";
1104     else
1105       return "ushr %d0, %d1, %2";
1106   }
1107   [(set_attr "type" "neon_shift_imm")]
1108 )
1109
1110 (define_expand "vec_set<mode>"
1111   [(match_operand:VALL_F16 0 "register_operand" "+w")
1112    (match_operand:<VEL> 1 "register_operand" "w")
1113    (match_operand:SI 2 "immediate_operand" "")]
1114   "TARGET_SIMD"
1115   {
1116     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1117     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1118                                           GEN_INT (elem), operands[0]));
1119     DONE;
1120   }
1121 )
1122
1123
1124 (define_insn "aarch64_mla<mode>"
1125  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1126        (plus:VDQ_BHSI (mult:VDQ_BHSI
1127                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1128                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1129                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1130  "TARGET_SIMD"
1131  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1132   [(set_attr "type" "neon_mla_<Vetype><q>")]
1133 )
1134
1135 (define_insn "*aarch64_mla_elt<mode>"
1136  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1137        (plus:VDQHS
1138          (mult:VDQHS
1139            (vec_duplicate:VDQHS
1140               (vec_select:<VEL>
1141                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1142                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1143            (match_operand:VDQHS 3 "register_operand" "w"))
1144          (match_operand:VDQHS 4 "register_operand" "0")))]
1145  "TARGET_SIMD"
1146   {
1147     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1148     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1149   }
1150   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1151 )
1152
1153 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1154  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1155        (plus:VDQHS
1156          (mult:VDQHS
1157            (vec_duplicate:VDQHS
1158               (vec_select:<VEL>
1159                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1160                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1161            (match_operand:VDQHS 3 "register_operand" "w"))
1162          (match_operand:VDQHS 4 "register_operand" "0")))]
1163  "TARGET_SIMD"
1164   {
1165     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1166     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1167   }
1168   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1169 )
1170
1171 (define_insn "*aarch64_mla_elt_merge<mode>"
1172   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1173         (plus:VDQHS
1174           (mult:VDQHS (vec_duplicate:VDQHS
1175                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1176                 (match_operand:VDQHS 2 "register_operand" "w"))
1177           (match_operand:VDQHS 3 "register_operand" "0")))]
1178  "TARGET_SIMD"
1179  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1180   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1181 )
1182
1183 (define_insn "aarch64_mls<mode>"
1184  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1185        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1186                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1187                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1188  "TARGET_SIMD"
1189  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1190   [(set_attr "type" "neon_mla_<Vetype><q>")]
1191 )
1192
1193 (define_insn "*aarch64_mls_elt<mode>"
1194  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1195        (minus:VDQHS
1196          (match_operand:VDQHS 4 "register_operand" "0")
1197          (mult:VDQHS
1198            (vec_duplicate:VDQHS
1199               (vec_select:<VEL>
1200                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1201                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1202            (match_operand:VDQHS 3 "register_operand" "w"))))]
1203  "TARGET_SIMD"
1204   {
1205     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1206     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1207   }
1208   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209 )
1210
1211 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1212  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1213        (minus:VDQHS
1214          (match_operand:VDQHS 4 "register_operand" "0")
1215          (mult:VDQHS
1216            (vec_duplicate:VDQHS
1217               (vec_select:<VEL>
1218                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1219                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1220            (match_operand:VDQHS 3 "register_operand" "w"))))]
1221  "TARGET_SIMD"
1222   {
1223     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1224     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1225   }
1226   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1227 )
1228
1229 (define_insn "*aarch64_mls_elt_merge<mode>"
1230   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1231         (minus:VDQHS
1232           (match_operand:VDQHS 1 "register_operand" "0")
1233           (mult:VDQHS (vec_duplicate:VDQHS
1234                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1235                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1236   "TARGET_SIMD"
1237   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1238   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1239 )
1240
1241 ;; Max/Min operations.
1242 (define_insn "<su><maxmin><mode>3"
1243  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1244        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1245                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1246  "TARGET_SIMD"
1247  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1248   [(set_attr "type" "neon_minmax<q>")]
1249 )
1250
1251 (define_expand "<su><maxmin>v2di3"
1252  [(set (match_operand:V2DI 0 "register_operand" "")
1253        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1254                     (match_operand:V2DI 2 "register_operand" "")))]
1255  "TARGET_SIMD"
1256 {
1257   enum rtx_code cmp_operator;
1258   rtx cmp_fmt;
1259
1260   switch (<CODE>)
1261     {
1262     case UMIN:
1263       cmp_operator = LTU;
1264       break;
1265     case SMIN:
1266       cmp_operator = LT;
1267       break;
1268     case UMAX:
1269       cmp_operator = GTU;
1270       break;
1271     case SMAX:
1272       cmp_operator = GT;
1273       break;
1274     default:
1275       gcc_unreachable ();
1276     }
1277
1278   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1279   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1280               operands[2], cmp_fmt, operands[1], operands[2]));
1281   DONE;
1282 })
1283
1284 ;; Pairwise Integer Max/Min operations.
1285 (define_insn "aarch64_<maxmin_uns>p<mode>"
1286  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1287        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1288                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1289                         MAXMINV))]
1290  "TARGET_SIMD"
1291  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1292   [(set_attr "type" "neon_minmax<q>")]
1293 )
1294
1295 ;; Pairwise FP Max/Min operations.
1296 (define_insn "aarch64_<maxmin_uns>p<mode>"
1297  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1298        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1299                       (match_operand:VHSDF 2 "register_operand" "w")]
1300                       FMAXMINV))]
1301  "TARGET_SIMD"
1302  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1303   [(set_attr "type" "neon_minmax<q>")]
1304 )
1305
1306 ;; vec_concat gives a new vector with the low elements from operand 1, and
1307 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1308 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1309 ;; What that means, is that the RTL descriptions of the below patterns
1310 ;; need to change depending on endianness.
1311
1312 ;; Move to the low architectural bits of the register.
1313 ;; On little-endian this is { operand, zeroes }
1314 ;; On big-endian this is { zeroes, operand }
1315
1316 (define_insn "move_lo_quad_internal_<mode>"
1317   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1318         (vec_concat:VQ_NO2E
1319           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1320           (vec_duplicate:<VHALF> (const_int 0))))]
1321   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1322   "@
1323    dup\\t%d0, %1.d[0]
1324    fmov\\t%d0, %1
1325    dup\\t%d0, %1"
1326   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1327    (set_attr "simd" "yes,*,yes")
1328    (set_attr "fp" "*,yes,*")
1329    (set_attr "length" "4")]
1330 )
1331
1332 (define_insn "move_lo_quad_internal_<mode>"
1333   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1334         (vec_concat:VQ_2E
1335           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1336           (const_int 0)))]
1337   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1338   "@
1339    dup\\t%d0, %1.d[0]
1340    fmov\\t%d0, %1
1341    dup\\t%d0, %1"
1342   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1343    (set_attr "simd" "yes,*,yes")
1344    (set_attr "fp" "*,yes,*")
1345    (set_attr "length" "4")]
1346 )
1347
1348 (define_insn "move_lo_quad_internal_be_<mode>"
1349   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1350         (vec_concat:VQ_NO2E
1351           (vec_duplicate:<VHALF> (const_int 0))
1352           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1353   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1354   "@
1355    dup\\t%d0, %1.d[0]
1356    fmov\\t%d0, %1
1357    dup\\t%d0, %1"
1358   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1359    (set_attr "simd" "yes,*,yes")
1360    (set_attr "fp" "*,yes,*")
1361    (set_attr "length" "4")]
1362 )
1363
1364 (define_insn "move_lo_quad_internal_be_<mode>"
1365   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1366         (vec_concat:VQ_2E
1367           (const_int 0)
1368           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1369   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1370   "@
1371    dup\\t%d0, %1.d[0]
1372    fmov\\t%d0, %1
1373    dup\\t%d0, %1"
1374   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1375    (set_attr "simd" "yes,*,yes")
1376    (set_attr "fp" "*,yes,*")
1377    (set_attr "length" "4")]
1378 )
1379
1380 (define_expand "move_lo_quad_<mode>"
1381   [(match_operand:VQ 0 "register_operand")
1382    (match_operand:VQ 1 "register_operand")]
1383   "TARGET_SIMD"
1384 {
1385   if (BYTES_BIG_ENDIAN)
1386     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1387   else
1388     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1389   DONE;
1390 }
1391 )
1392
1393 ;; Move operand1 to the high architectural bits of the register, keeping
1394 ;; the low architectural bits of operand2.
1395 ;; For little-endian this is { operand2, operand1 }
1396 ;; For big-endian this is { operand1, operand2 }
1397
1398 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1399   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1400         (vec_concat:VQ
1401           (vec_select:<VHALF>
1402                 (match_dup 0)
1403                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1404           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1405   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1406   "@
1407    ins\\t%0.d[1], %1.d[0]
1408    ins\\t%0.d[1], %1"
1409   [(set_attr "type" "neon_ins")]
1410 )
1411
1412 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1413   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1414         (vec_concat:VQ
1415           (match_operand:<VHALF> 1 "register_operand" "w,r")
1416           (vec_select:<VHALF>
1417                 (match_dup 0)
1418                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1419   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1420   "@
1421    ins\\t%0.d[1], %1.d[0]
1422    ins\\t%0.d[1], %1"
1423   [(set_attr "type" "neon_ins")]
1424 )
1425
1426 (define_expand "move_hi_quad_<mode>"
1427  [(match_operand:VQ 0 "register_operand" "")
1428   (match_operand:<VHALF> 1 "register_operand" "")]
1429  "TARGET_SIMD"
1430 {
1431   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1432   if (BYTES_BIG_ENDIAN)
1433     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1434                     operands[1], p));
1435   else
1436     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1437                     operands[1], p));
1438   DONE;
1439 })
1440
1441 ;; Narrowing operations.
1442
1443 ;; For doubles.
1444 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1445  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1446        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1447  "TARGET_SIMD"
1448  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1449   [(set_attr "type" "neon_shift_imm_narrow_q")]
1450 )
1451
1452 (define_expand "vec_pack_trunc_<mode>"
1453  [(match_operand:<VNARROWD> 0 "register_operand" "")
1454   (match_operand:VDN 1 "register_operand" "")
1455   (match_operand:VDN 2 "register_operand" "")]
1456  "TARGET_SIMD"
1457 {
1458   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1459   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1460   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1461
1462   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1463   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1464   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1465   DONE;
1466 })
1467
1468 ;; For quads.
1469
1470 (define_insn "vec_pack_trunc_<mode>"
1471  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1472        (vec_concat:<VNARROWQ2>
1473          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1474          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1475  "TARGET_SIMD"
1476  {
1477    if (BYTES_BIG_ENDIAN)
1478      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1479    else
1480      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1481  }
1482   [(set_attr "type" "multiple")
1483    (set_attr "length" "8")]
1484 )
1485
1486 ;; Widening operations.
1487
1488 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1489   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1490         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1491                                (match_operand:VQW 1 "register_operand" "w")
1492                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1493                             )))]
1494   "TARGET_SIMD"
1495   "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1496   [(set_attr "type" "neon_shift_imm_long")]
1497 )
1498
1499 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1500   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1501         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1502                                (match_operand:VQW 1 "register_operand" "w")
1503                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1504                             )))]
1505   "TARGET_SIMD"
1506   "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1507   [(set_attr "type" "neon_shift_imm_long")]
1508 )
1509
1510 (define_expand "vec_unpack<su>_hi_<mode>"
1511   [(match_operand:<VWIDE> 0 "register_operand" "")
1512    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1513   "TARGET_SIMD"
1514   {
1515     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1516     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1517                                                           operands[1], p));
1518     DONE;
1519   }
1520 )
1521
1522 (define_expand "vec_unpack<su>_lo_<mode>"
1523   [(match_operand:<VWIDE> 0 "register_operand" "")
1524    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1525   "TARGET_SIMD"
1526   {
1527     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1528     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1529                                                           operands[1], p));
1530     DONE;
1531   }
1532 )
1533
1534 ;; Widening arithmetic.
1535
1536 (define_insn "*aarch64_<su>mlal_lo<mode>"
1537   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1538         (plus:<VWIDE>
1539           (mult:<VWIDE>
1540               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1541                  (match_operand:VQW 2 "register_operand" "w")
1542                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1543               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1544                  (match_operand:VQW 4 "register_operand" "w")
1545                  (match_dup 3))))
1546           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1547   "TARGET_SIMD"
1548   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1549   [(set_attr "type" "neon_mla_<Vetype>_long")]
1550 )
1551
1552 (define_insn "*aarch64_<su>mlal_hi<mode>"
1553   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1554         (plus:<VWIDE>
1555           (mult:<VWIDE>
1556               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1557                  (match_operand:VQW 2 "register_operand" "w")
1558                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1559               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1560                  (match_operand:VQW 4 "register_operand" "w")
1561                  (match_dup 3))))
1562           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1563   "TARGET_SIMD"
1564   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1565   [(set_attr "type" "neon_mla_<Vetype>_long")]
1566 )
1567
1568 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1569   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1570         (minus:<VWIDE>
1571           (match_operand:<VWIDE> 1 "register_operand" "0")
1572           (mult:<VWIDE>
1573               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1574                  (match_operand:VQW 2 "register_operand" "w")
1575                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1576               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1577                  (match_operand:VQW 4 "register_operand" "w")
1578                  (match_dup 3))))))]
1579   "TARGET_SIMD"
1580   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1581   [(set_attr "type" "neon_mla_<Vetype>_long")]
1582 )
1583
1584 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1585   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1586         (minus:<VWIDE>
1587           (match_operand:<VWIDE> 1 "register_operand" "0")
1588           (mult:<VWIDE>
1589               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1590                  (match_operand:VQW 2 "register_operand" "w")
1591                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1592               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1593                  (match_operand:VQW 4 "register_operand" "w")
1594                  (match_dup 3))))))]
1595   "TARGET_SIMD"
1596   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1597   [(set_attr "type" "neon_mla_<Vetype>_long")]
1598 )
1599
1600 (define_insn "*aarch64_<su>mlal<mode>"
1601   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1602         (plus:<VWIDE>
1603           (mult:<VWIDE>
1604             (ANY_EXTEND:<VWIDE>
1605               (match_operand:VD_BHSI 1 "register_operand" "w"))
1606             (ANY_EXTEND:<VWIDE>
1607               (match_operand:VD_BHSI 2 "register_operand" "w")))
1608           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1609   "TARGET_SIMD"
1610   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1611   [(set_attr "type" "neon_mla_<Vetype>_long")]
1612 )
1613
1614 (define_insn "*aarch64_<su>mlsl<mode>"
1615   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1616         (minus:<VWIDE>
1617           (match_operand:<VWIDE> 1 "register_operand" "0")
1618           (mult:<VWIDE>
1619             (ANY_EXTEND:<VWIDE>
1620               (match_operand:VD_BHSI 2 "register_operand" "w"))
1621             (ANY_EXTEND:<VWIDE>
1622               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1623   "TARGET_SIMD"
1624   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1625   [(set_attr "type" "neon_mla_<Vetype>_long")]
1626 )
1627
1628 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1629  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1630        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1631                            (match_operand:VQW 1 "register_operand" "w")
1632                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1633                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1634                            (match_operand:VQW 2 "register_operand" "w")
1635                            (match_dup 3)))))]
1636   "TARGET_SIMD"
1637   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1638   [(set_attr "type" "neon_mul_<Vetype>_long")]
1639 )
1640
1641 (define_expand "vec_widen_<su>mult_lo_<mode>"
1642   [(match_operand:<VWIDE> 0 "register_operand" "")
1643    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1644    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1645  "TARGET_SIMD"
1646  {
1647    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1648    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1649                                                        operands[1],
1650                                                        operands[2], p));
1651    DONE;
1652  }
1653 )
1654
1655 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1656  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1657       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658                             (match_operand:VQW 1 "register_operand" "w")
1659                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1660                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1661                             (match_operand:VQW 2 "register_operand" "w")
1662                             (match_dup 3)))))]
1663   "TARGET_SIMD"
1664   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1665   [(set_attr "type" "neon_mul_<Vetype>_long")]
1666 )
1667
1668 (define_expand "vec_widen_<su>mult_hi_<mode>"
1669   [(match_operand:<VWIDE> 0 "register_operand" "")
1670    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1671    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1672  "TARGET_SIMD"
1673  {
1674    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1675    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1676                                                        operands[1],
1677                                                        operands[2], p));
1678    DONE;
1679
1680  }
1681 )
1682
1683 ;; FP vector operations.
1684 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1685 ;; double-precision (64-bit) floating-point data types and arithmetic as
1686 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1687 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1688 ;;
1689 ;; Floating-point operations can raise an exception.  Vectorizing such
1690 ;; operations are safe because of reasons explained below.
1691 ;;
1692 ;; ARMv8 permits an extension to enable trapped floating-point
1693 ;; exception handling, however this is an optional feature.  In the
1694 ;; event of a floating-point exception being raised by vectorised
1695 ;; code then:
1696 ;; 1.  If trapped floating-point exceptions are available, then a trap
1697 ;;     will be taken when any lane raises an enabled exception.  A trap
1698 ;;     handler may determine which lane raised the exception.
1699 ;; 2.  Alternatively a sticky exception flag is set in the
1700 ;;     floating-point status register (FPSR).  Software may explicitly
1701 ;;     test the exception flags, in which case the tests will either
1702 ;;     prevent vectorisation, allowing precise identification of the
1703 ;;     failing operation, or if tested outside of vectorisable regions
1704 ;;     then the specific operation and lane are not of interest.
1705
1706 ;; FP arithmetic operations.
1707
1708 (define_insn "add<mode>3"
1709  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1710        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1711                    (match_operand:VHSDF 2 "register_operand" "w")))]
1712  "TARGET_SIMD"
1713  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1714   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1715 )
1716
1717 (define_insn "sub<mode>3"
1718  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1719        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1720                     (match_operand:VHSDF 2 "register_operand" "w")))]
1721  "TARGET_SIMD"
1722  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1723   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1724 )
1725
1726 (define_insn "mul<mode>3"
1727  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1728        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1729                    (match_operand:VHSDF 2 "register_operand" "w")))]
1730  "TARGET_SIMD"
1731  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1732   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1733 )
1734
1735 (define_expand "div<mode>3"
1736  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1737        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1738                   (match_operand:VHSDF 2 "register_operand" "w")))]
1739  "TARGET_SIMD"
1740 {
1741   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1742     DONE;
1743
1744   operands[1] = force_reg (<MODE>mode, operands[1]);
1745 })
1746
1747 (define_insn "*div<mode>3"
1748  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1749        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1750                  (match_operand:VHSDF 2 "register_operand" "w")))]
1751  "TARGET_SIMD"
1752  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1753   [(set_attr "type" "neon_fp_div_<stype><q>")]
1754 )
1755
1756 (define_insn "neg<mode>2"
1757  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1758        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1759  "TARGET_SIMD"
1760  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1761   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1762 )
1763
1764 (define_insn "abs<mode>2"
1765  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1766        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1767  "TARGET_SIMD"
1768  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1769   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1770 )
1771
1772 (define_insn "fma<mode>4"
1773   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1774        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1775                   (match_operand:VHSDF 2 "register_operand" "w")
1776                   (match_operand:VHSDF 3 "register_operand" "0")))]
1777   "TARGET_SIMD"
1778  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1779   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1780 )
1781
1782 (define_insn "*aarch64_fma4_elt<mode>"
1783   [(set (match_operand:VDQF 0 "register_operand" "=w")
1784     (fma:VDQF
1785       (vec_duplicate:VDQF
1786         (vec_select:<VEL>
1787           (match_operand:VDQF 1 "register_operand" "<h_con>")
1788           (parallel [(match_operand:SI 2 "immediate_operand")])))
1789       (match_operand:VDQF 3 "register_operand" "w")
1790       (match_operand:VDQF 4 "register_operand" "0")))]
1791   "TARGET_SIMD"
1792   {
1793     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1794     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1795   }
1796   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1797 )
1798
1799 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1800   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1801     (fma:VDQSF
1802       (vec_duplicate:VDQSF
1803         (vec_select:<VEL>
1804           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1805           (parallel [(match_operand:SI 2 "immediate_operand")])))
1806       (match_operand:VDQSF 3 "register_operand" "w")
1807       (match_operand:VDQSF 4 "register_operand" "0")))]
1808   "TARGET_SIMD"
1809   {
1810     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1811     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1812   }
1813   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1814 )
1815
1816 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1817   [(set (match_operand:VMUL 0 "register_operand" "=w")
1818     (fma:VMUL
1819       (vec_duplicate:VMUL
1820           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1821       (match_operand:VMUL 2 "register_operand" "w")
1822       (match_operand:VMUL 3 "register_operand" "0")))]
1823   "TARGET_SIMD"
1824   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1825   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1826 )
1827
1828 (define_insn "*aarch64_fma4_elt_to_64v2df"
1829   [(set (match_operand:DF 0 "register_operand" "=w")
1830     (fma:DF
1831         (vec_select:DF
1832           (match_operand:V2DF 1 "register_operand" "w")
1833           (parallel [(match_operand:SI 2 "immediate_operand")]))
1834       (match_operand:DF 3 "register_operand" "w")
1835       (match_operand:DF 4 "register_operand" "0")))]
1836   "TARGET_SIMD"
1837   {
1838     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1839     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1840   }
1841   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1842 )
1843
1844 (define_insn "fnma<mode>4"
1845   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1846         (fma:VHSDF
1847           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1848           (match_operand:VHSDF 2 "register_operand" "w")
1849           (match_operand:VHSDF 3 "register_operand" "0")))]
1850   "TARGET_SIMD"
1851   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1852   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1853 )
1854
1855 (define_insn "*aarch64_fnma4_elt<mode>"
1856   [(set (match_operand:VDQF 0 "register_operand" "=w")
1857     (fma:VDQF
1858       (neg:VDQF
1859         (match_operand:VDQF 3 "register_operand" "w"))
1860       (vec_duplicate:VDQF
1861         (vec_select:<VEL>
1862           (match_operand:VDQF 1 "register_operand" "<h_con>")
1863           (parallel [(match_operand:SI 2 "immediate_operand")])))
1864       (match_operand:VDQF 4 "register_operand" "0")))]
1865   "TARGET_SIMD"
1866   {
1867     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1868     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1869   }
1870   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1871 )
1872
1873 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1874   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1875     (fma:VDQSF
1876       (neg:VDQSF
1877         (match_operand:VDQSF 3 "register_operand" "w"))
1878       (vec_duplicate:VDQSF
1879         (vec_select:<VEL>
1880           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1881           (parallel [(match_operand:SI 2 "immediate_operand")])))
1882       (match_operand:VDQSF 4 "register_operand" "0")))]
1883   "TARGET_SIMD"
1884   {
1885     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1886     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1887   }
1888   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1889 )
1890
1891 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1892   [(set (match_operand:VMUL 0 "register_operand" "=w")
1893     (fma:VMUL
1894       (neg:VMUL
1895         (match_operand:VMUL 2 "register_operand" "w"))
1896       (vec_duplicate:VMUL
1897         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1898       (match_operand:VMUL 3 "register_operand" "0")))]
1899   "TARGET_SIMD"
1900   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1901   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1902 )
1903
1904 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1905   [(set (match_operand:DF 0 "register_operand" "=w")
1906     (fma:DF
1907       (vec_select:DF
1908         (match_operand:V2DF 1 "register_operand" "w")
1909         (parallel [(match_operand:SI 2 "immediate_operand")]))
1910       (neg:DF
1911         (match_operand:DF 3 "register_operand" "w"))
1912       (match_operand:DF 4 "register_operand" "0")))]
1913   "TARGET_SIMD"
1914   {
1915     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1916     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1917   }
1918   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1919 )
1920
1921 ;; Vector versions of the floating-point frint patterns.
1922 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1923 (define_insn "<frint_pattern><mode>2"
1924   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1925         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1926                        FRINT))]
1927   "TARGET_SIMD"
1928   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1929   [(set_attr "type" "neon_fp_round_<stype><q>")]
1930 )
1931
1932 ;; Vector versions of the fcvt standard patterns.
1933 ;; Expands to lbtrunc, lround, lceil, lfloor
1934 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1935   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1936         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1937                                [(match_operand:VHSDF 1 "register_operand" "w")]
1938                                FCVT)))]
1939   "TARGET_SIMD"
1940   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1941   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1942 )
1943
1944 ;; HF Scalar variants of related SIMD instructions.
1945 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1946   [(set (match_operand:HI 0 "register_operand" "=w")
1947         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1948                       FCVT)))]
1949   "TARGET_SIMD_F16INST"
1950   "fcvt<frint_suffix><su>\t%h0, %h1"
1951   [(set_attr "type" "neon_fp_to_int_s")]
1952 )
1953
1954 (define_insn "<optab>_trunchfhi2"
1955   [(set (match_operand:HI 0 "register_operand" "=w")
1956         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1957   "TARGET_SIMD_F16INST"
1958   "fcvtz<su>\t%h0, %h1"
1959   [(set_attr "type" "neon_fp_to_int_s")]
1960 )
1961
1962 (define_insn "<optab>hihf2"
1963   [(set (match_operand:HF 0 "register_operand" "=w")
1964         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1965   "TARGET_SIMD_F16INST"
1966   "<su_optab>cvtf\t%h0, %h1"
1967   [(set_attr "type" "neon_int_to_fp_s")]
1968 )
1969
1970 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1971   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1972         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1973                                [(mult:VDQF
1974          (match_operand:VDQF 1 "register_operand" "w")
1975          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1976                                UNSPEC_FRINTZ)))]
1977   "TARGET_SIMD
1978    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1979                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1980   {
1981     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1982     char buf[64];
1983     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1984     output_asm_insn (buf, operands);
1985     return "";
1986   }
1987   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1988 )
1989
1990 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1991   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1992         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1993                                [(match_operand:VHSDF 1 "register_operand")]
1994                                 UNSPEC_FRINTZ)))]
1995   "TARGET_SIMD"
1996   {})
1997
1998 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1999   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2000         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2001                                [(match_operand:VHSDF 1 "register_operand")]
2002                                 UNSPEC_FRINTZ)))]
2003   "TARGET_SIMD"
2004   {})
2005
2006 (define_expand "ftrunc<VHSDF:mode>2"
2007   [(set (match_operand:VHSDF 0 "register_operand")
2008         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2009                        UNSPEC_FRINTZ))]
2010   "TARGET_SIMD"
2011   {})
2012
2013 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2014   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2015         (FLOATUORS:VHSDF
2016           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2017   "TARGET_SIMD"
2018   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2019   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2020 )
2021
2022 ;; Conversions between vectors of floats and doubles.
2023 ;; Contains a mix of patterns to match standard pattern names
2024 ;; and those for intrinsics.
2025
2026 ;; Float widening operations.
2027
2028 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2029   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2030         (float_extend:<VWIDE> (vec_select:<VHALF>
2031                                (match_operand:VQ_HSF 1 "register_operand" "w")
2032                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2033                             )))]
2034   "TARGET_SIMD"
2035   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2036   [(set_attr "type" "neon_fp_cvt_widen_s")]
2037 )
2038
2039 ;; Convert between fixed-point and floating-point (vector modes)
2040
2041 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2042   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2043         (unspec:<VHSDF:FCVT_TARGET>
2044           [(match_operand:VHSDF 1 "register_operand" "w")
2045            (match_operand:SI 2 "immediate_operand" "i")]
2046          FCVT_F2FIXED))]
2047   "TARGET_SIMD"
2048   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2049   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2050 )
2051
2052 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2053   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2054         (unspec:<VDQ_HSDI:FCVT_TARGET>
2055           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2056            (match_operand:SI 2 "immediate_operand" "i")]
2057          FCVT_FIXED2F))]
2058   "TARGET_SIMD"
2059   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2060   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2061 )
2062
2063 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2064 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2065 ;; the meaning of HI and LO changes depending on the target endianness.
2066 ;; While elsewhere we map the higher numbered elements of a vector to
2067 ;; the lower architectural lanes of the vector, for these patterns we want
2068 ;; to always treat "hi" as referring to the higher architectural lanes.
2069 ;; Consequently, while the patterns below look inconsistent with our
2070 ;; other big-endian patterns their behavior is as required.
2071
2072 (define_expand "vec_unpacks_lo_<mode>"
2073   [(match_operand:<VWIDE> 0 "register_operand" "")
2074    (match_operand:VQ_HSF 1 "register_operand" "")]
2075   "TARGET_SIMD"
2076   {
2077     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2078     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2079                                                        operands[1], p));
2080     DONE;
2081   }
2082 )
2083
2084 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2085   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2086         (float_extend:<VWIDE> (vec_select:<VHALF>
2087                                (match_operand:VQ_HSF 1 "register_operand" "w")
2088                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2089                             )))]
2090   "TARGET_SIMD"
2091   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2092   [(set_attr "type" "neon_fp_cvt_widen_s")]
2093 )
2094
2095 (define_expand "vec_unpacks_hi_<mode>"
2096   [(match_operand:<VWIDE> 0 "register_operand" "")
2097    (match_operand:VQ_HSF 1 "register_operand" "")]
2098   "TARGET_SIMD"
2099   {
2100     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2101     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2102                                                        operands[1], p));
2103     DONE;
2104   }
2105 )
2106 (define_insn "aarch64_float_extend_lo_<Vwide>"
2107   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2108         (float_extend:<VWIDE>
2109           (match_operand:VDF 1 "register_operand" "w")))]
2110   "TARGET_SIMD"
2111   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2112   [(set_attr "type" "neon_fp_cvt_widen_s")]
2113 )
2114
2115 ;; Float narrowing operations.
2116
2117 (define_insn "aarch64_float_truncate_lo_<mode>"
2118   [(set (match_operand:VDF 0 "register_operand" "=w")
2119       (float_truncate:VDF
2120         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2121   "TARGET_SIMD"
2122   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2123   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2124 )
2125
2126 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2127   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2128     (vec_concat:<VDBL>
2129       (match_operand:VDF 1 "register_operand" "0")
2130       (float_truncate:VDF
2131         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2132   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2133   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2134   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2135 )
2136
2137 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2138   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2139     (vec_concat:<VDBL>
2140       (float_truncate:VDF
2141         (match_operand:<VWIDE> 2 "register_operand" "w"))
2142       (match_operand:VDF 1 "register_operand" "0")))]
2143   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2144   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2145   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2146 )
2147
2148 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2149   [(match_operand:<VDBL> 0 "register_operand" "=w")
2150    (match_operand:VDF 1 "register_operand" "0")
2151    (match_operand:<VWIDE> 2 "register_operand" "w")]
2152   "TARGET_SIMD"
2153 {
2154   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2155                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2156                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2157   emit_insn (gen (operands[0], operands[1], operands[2]));
2158   DONE;
2159 }
2160 )
2161
2162 (define_expand "vec_pack_trunc_v2df"
2163   [(set (match_operand:V4SF 0 "register_operand")
2164       (vec_concat:V4SF
2165         (float_truncate:V2SF
2166             (match_operand:V2DF 1 "register_operand"))
2167         (float_truncate:V2SF
2168             (match_operand:V2DF 2 "register_operand"))
2169           ))]
2170   "TARGET_SIMD"
2171   {
2172     rtx tmp = gen_reg_rtx (V2SFmode);
2173     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2174     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2175
2176     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2177     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2178                                                    tmp, operands[hi]));
2179     DONE;
2180   }
2181 )
2182
2183 (define_expand "vec_pack_trunc_df"
2184   [(set (match_operand:V2SF 0 "register_operand")
2185       (vec_concat:V2SF
2186         (float_truncate:SF
2187             (match_operand:DF 1 "register_operand"))
2188         (float_truncate:SF
2189             (match_operand:DF 2 "register_operand"))
2190           ))]
2191   "TARGET_SIMD"
2192   {
2193     rtx tmp = gen_reg_rtx (V2SFmode);
2194     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2195     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2196
2197     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2198     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2199     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2200     DONE;
2201   }
2202 )
2203
2204 ;; FP Max/Min
2205 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2206 ;; expression like:
2207 ;;      a = (b < c) ? b : c;
2208 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2209 ;; either explicitly or indirectly via -ffast-math.
2210 ;;
2211 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2212 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2213 ;; operand will be returned when both operands are zero (i.e. they may not
2214 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2215 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2216 ;; NaNs.
2217
2218 (define_insn "<su><maxmin><mode>3"
2219   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2220         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2221                        (match_operand:VHSDF 2 "register_operand" "w")))]
2222   "TARGET_SIMD"
2223   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2224   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2225 )
2226
2227 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2228 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2229 ;; which implement the IEEE fmax ()/fmin () functions.
2230 (define_insn "<maxmin_uns><mode>3"
2231   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2232        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2233                       (match_operand:VHSDF 2 "register_operand" "w")]
2234                       FMAXMIN_UNS))]
2235   "TARGET_SIMD"
2236   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2237   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2238 )
2239
2240 ;; 'across lanes' add.
2241
2242 (define_expand "reduc_plus_scal_<mode>"
2243   [(match_operand:<VEL> 0 "register_operand" "=w")
2244    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2245                UNSPEC_ADDV)]
2246   "TARGET_SIMD"
2247   {
2248     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2249     rtx scratch = gen_reg_rtx (<MODE>mode);
2250     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2251     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2252     DONE;
2253   }
2254 )
2255
2256 (define_insn "aarch64_faddp<mode>"
2257  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2258        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2259                       (match_operand:VHSDF 2 "register_operand" "w")]
2260         UNSPEC_FADDV))]
2261  "TARGET_SIMD"
2262  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2263   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2264 )
2265
2266 (define_insn "aarch64_reduc_plus_internal<mode>"
2267  [(set (match_operand:VDQV 0 "register_operand" "=w")
2268        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2269                     UNSPEC_ADDV))]
2270  "TARGET_SIMD"
2271  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2272   [(set_attr "type" "neon_reduc_add<q>")]
2273 )
2274
2275 (define_insn "aarch64_reduc_plus_internalv2si"
2276  [(set (match_operand:V2SI 0 "register_operand" "=w")
2277        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2278                     UNSPEC_ADDV))]
2279  "TARGET_SIMD"
2280  "addp\\t%0.2s, %1.2s, %1.2s"
2281   [(set_attr "type" "neon_reduc_add")]
2282 )
2283
2284 (define_insn "reduc_plus_scal_<mode>"
2285  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2286        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2287                    UNSPEC_FADDV))]
2288  "TARGET_SIMD"
2289  "faddp\\t%<Vetype>0, %1.<Vtype>"
2290   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2291 )
2292
2293 (define_expand "reduc_plus_scal_v4sf"
2294  [(set (match_operand:SF 0 "register_operand")
2295        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2296                     UNSPEC_FADDV))]
2297  "TARGET_SIMD"
2298 {
2299   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2300   rtx scratch = gen_reg_rtx (V4SFmode);
2301   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2302   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2303   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2304   DONE;
2305 })
2306
2307 (define_insn "clrsb<mode>2"
2308   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2309         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2310   "TARGET_SIMD"
2311   "cls\\t%0.<Vtype>, %1.<Vtype>"
2312   [(set_attr "type" "neon_cls<q>")]
2313 )
2314
2315 (define_insn "clz<mode>2"
2316  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2317        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2318  "TARGET_SIMD"
2319  "clz\\t%0.<Vtype>, %1.<Vtype>"
2320   [(set_attr "type" "neon_cls<q>")]
2321 )
2322
2323 (define_insn "popcount<mode>2"
2324   [(set (match_operand:VB 0 "register_operand" "=w")
2325         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2326   "TARGET_SIMD"
2327   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2328   [(set_attr "type" "neon_cnt<q>")]
2329 )
2330
2331 ;; 'across lanes' max and min ops.
2332
2333 ;; Template for outputting a scalar, so we can create __builtins which can be
2334 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2335 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2336   [(match_operand:<VEL> 0 "register_operand")
2337    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2338                   FMAXMINV)]
2339   "TARGET_SIMD"
2340   {
2341     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2342     rtx scratch = gen_reg_rtx (<MODE>mode);
2343     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2344                                                               operands[1]));
2345     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2346     DONE;
2347   }
2348 )
2349
2350 ;; Likewise for integer cases, signed and unsigned.
2351 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2352   [(match_operand:<VEL> 0 "register_operand")
2353    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2354                     MAXMINV)]
2355   "TARGET_SIMD"
2356   {
2357     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358     rtx scratch = gen_reg_rtx (<MODE>mode);
2359     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2360                                                               operands[1]));
2361     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2362     DONE;
2363   }
2364 )
2365
2366 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2367  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2368        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2369                     MAXMINV))]
2370  "TARGET_SIMD"
2371  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2372   [(set_attr "type" "neon_reduc_minmax<q>")]
2373 )
2374
2375 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2376  [(set (match_operand:V2SI 0 "register_operand" "=w")
2377        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2378                     MAXMINV))]
2379  "TARGET_SIMD"
2380  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2381   [(set_attr "type" "neon_reduc_minmax")]
2382 )
2383
2384 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2385  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2386        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2387                       FMAXMINV))]
2388  "TARGET_SIMD"
2389  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2390   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2391 )
2392
2393 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2394 ;; allocation.
2395 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2396 ;; to select.
2397 ;;
2398 ;; Thus our BSL is of the form:
2399 ;;   op0 = bsl (mask, op2, op3)
2400 ;; We can use any of:
2401 ;;
2402 ;;   if (op0 = mask)
2403 ;;     bsl mask, op1, op2
2404 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2405 ;;     bit op0, op2, mask
2406 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2407 ;;     bif op0, op1, mask
2408 ;;
2409 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2410 ;; Some forms of straight-line code may generate the equivalent form
2411 ;; in *aarch64_simd_bsl<mode>_alt.
2412
2413 (define_insn "aarch64_simd_bsl<mode>_internal"
2414   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2415         (xor:VDQ_I
2416            (and:VDQ_I
2417              (xor:VDQ_I
2418                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2419                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2420              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2421           (match_dup:<V_INT_EQUIV> 3)
2422         ))]
2423   "TARGET_SIMD"
2424   "@
2425   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2426   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2427   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2428   [(set_attr "type" "neon_bsl<q>")]
2429 )
2430
2431 ;; We need this form in addition to the above pattern to match the case
2432 ;; when combine tries merging three insns such that the second operand of
2433 ;; the outer XOR matches the second operand of the inner XOR rather than
2434 ;; the first.  The two are equivalent but since recog doesn't try all
2435 ;; permutations of commutative operations, we have to have a separate pattern.
2436
2437 (define_insn "*aarch64_simd_bsl<mode>_alt"
2438   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2439         (xor:VDQ_I
2440            (and:VDQ_I
2441              (xor:VDQ_I
2442                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2443                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2444               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2445           (match_dup:<V_INT_EQUIV> 2)))]
2446   "TARGET_SIMD"
2447   "@
2448   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2449   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2450   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2451   [(set_attr "type" "neon_bsl<q>")]
2452 )
2453
2454 ;; DImode is special, we want to avoid computing operations which are
2455 ;; more naturally computed in general purpose registers in the vector
2456 ;; registers.  If we do that, we need to move all three operands from general
2457 ;; purpose registers to vector registers, then back again.  However, we
2458 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2459 ;; optimizations based on the component operations of a BSL.
2460 ;;
2461 ;; That means we need a splitter back to the individual operations, if they
2462 ;; would be better calculated on the integer side.
2463
2464 (define_insn_and_split "aarch64_simd_bsldi_internal"
2465   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2466         (xor:DI
2467            (and:DI
2468              (xor:DI
2469                (match_operand:DI 3 "register_operand" "w,0,w,r")
2470                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2471              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2472           (match_dup:DI 3)
2473         ))]
2474   "TARGET_SIMD"
2475   "@
2476   bsl\\t%0.8b, %2.8b, %3.8b
2477   bit\\t%0.8b, %2.8b, %1.8b
2478   bif\\t%0.8b, %3.8b, %1.8b
2479   #"
2480   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2481   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2482 {
2483   /* Split back to individual operations.  If we're before reload, and
2484      able to create a temporary register, do so.  If we're after reload,
2485      we've got an early-clobber destination register, so use that.
2486      Otherwise, we can't create pseudos and we can't yet guarantee that
2487      operands[0] is safe to write, so FAIL to split.  */
2488
2489   rtx scratch;
2490   if (reload_completed)
2491     scratch = operands[0];
2492   else if (can_create_pseudo_p ())
2493     scratch = gen_reg_rtx (DImode);
2494   else
2495     FAIL;
2496
2497   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2498   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2499   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2500   DONE;
2501 }
2502   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2503    (set_attr "length" "4,4,4,12")]
2504 )
2505
2506 (define_insn_and_split "aarch64_simd_bsldi_alt"
2507   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2508         (xor:DI
2509            (and:DI
2510              (xor:DI
2511                (match_operand:DI 3 "register_operand" "w,w,0,r")
2512                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2513              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2514           (match_dup:DI 2)
2515         ))]
2516   "TARGET_SIMD"
2517   "@
2518   bsl\\t%0.8b, %3.8b, %2.8b
2519   bit\\t%0.8b, %3.8b, %1.8b
2520   bif\\t%0.8b, %2.8b, %1.8b
2521   #"
2522   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2523   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2524 {
2525   /* Split back to individual operations.  If we're before reload, and
2526      able to create a temporary register, do so.  If we're after reload,
2527      we've got an early-clobber destination register, so use that.
2528      Otherwise, we can't create pseudos and we can't yet guarantee that
2529      operands[0] is safe to write, so FAIL to split.  */
2530
2531   rtx scratch;
2532   if (reload_completed)
2533     scratch = operands[0];
2534   else if (can_create_pseudo_p ())
2535     scratch = gen_reg_rtx (DImode);
2536   else
2537     FAIL;
2538
2539   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2540   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2541   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2542   DONE;
2543 }
2544   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2545    (set_attr "length" "4,4,4,12")]
2546 )
2547
2548 (define_expand "aarch64_simd_bsl<mode>"
2549   [(match_operand:VALLDIF 0 "register_operand")
2550    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2551    (match_operand:VALLDIF 2 "register_operand")
2552    (match_operand:VALLDIF 3 "register_operand")]
2553  "TARGET_SIMD"
2554 {
2555   /* We can't alias operands together if they have different modes.  */
2556   rtx tmp = operands[0];
2557   if (FLOAT_MODE_P (<MODE>mode))
2558     {
2559       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2560       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2561       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2562     }
2563   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2564   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2565                                                          operands[1],
2566                                                          operands[2],
2567                                                          operands[3]));
2568   if (tmp != operands[0])
2569     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2570
2571   DONE;
2572 })
2573
2574 (define_expand "vcond_mask_<mode><v_int_equiv>"
2575   [(match_operand:VALLDI 0 "register_operand")
2576    (match_operand:VALLDI 1 "nonmemory_operand")
2577    (match_operand:VALLDI 2 "nonmemory_operand")
2578    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2579   "TARGET_SIMD"
2580 {
2581   /* If we have (a = (P) ? -1 : 0);
2582      Then we can simply move the generated mask (result must be int).  */
2583   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2584       && operands[2] == CONST0_RTX (<MODE>mode))
2585     emit_move_insn (operands[0], operands[3]);
2586   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2587   else if (operands[1] == CONST0_RTX (<MODE>mode)
2588            && operands[2] == CONSTM1_RTX (<MODE>mode))
2589     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2590   else
2591     {
2592       if (!REG_P (operands[1]))
2593         operands[1] = force_reg (<MODE>mode, operands[1]);
2594       if (!REG_P (operands[2]))
2595         operands[2] = force_reg (<MODE>mode, operands[2]);
2596       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2597                                              operands[1], operands[2]));
2598     }
2599
2600   DONE;
2601 })
2602
2603 ;; Patterns comparing two vectors to produce a mask.
2604
2605 (define_expand "vec_cmp<mode><mode>"
2606   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2607           (match_operator 1 "comparison_operator"
2608             [(match_operand:VSDQ_I_DI 2 "register_operand")
2609              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2610   "TARGET_SIMD"
2611 {
2612   rtx mask = operands[0];
2613   enum rtx_code code = GET_CODE (operands[1]);
2614
2615   switch (code)
2616     {
2617     case NE:
2618     case LE:
2619     case LT:
2620     case GE:
2621     case GT:
2622     case EQ:
2623       if (operands[3] == CONST0_RTX (<MODE>mode))
2624         break;
2625
2626       /* Fall through.  */
2627     default:
2628       if (!REG_P (operands[3]))
2629         operands[3] = force_reg (<MODE>mode, operands[3]);
2630
2631       break;
2632     }
2633
2634   switch (code)
2635     {
2636     case LT:
2637       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2638       break;
2639
2640     case GE:
2641       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2642       break;
2643
2644     case LE:
2645       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2646       break;
2647
2648     case GT:
2649       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2650       break;
2651
2652     case LTU:
2653       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2654       break;
2655
2656     case GEU:
2657       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2658       break;
2659
2660     case LEU:
2661       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2662       break;
2663
2664     case GTU:
2665       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2666       break;
2667
2668     case NE:
2669       /* Handle NE as !EQ.  */
2670       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2671       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2672       break;
2673
2674     case EQ:
2675       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2676       break;
2677
2678     default:
2679       gcc_unreachable ();
2680     }
2681
2682   DONE;
2683 })
2684
2685 (define_expand "vec_cmp<mode><v_int_equiv>"
2686   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2687         (match_operator 1 "comparison_operator"
2688             [(match_operand:VDQF 2 "register_operand")
2689              (match_operand:VDQF 3 "nonmemory_operand")]))]
2690   "TARGET_SIMD"
2691 {
2692   int use_zero_form = 0;
2693   enum rtx_code code = GET_CODE (operands[1]);
2694   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2695
2696   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2697
2698   switch (code)
2699     {
2700     case LE:
2701     case LT:
2702     case GE:
2703     case GT:
2704     case EQ:
2705       if (operands[3] == CONST0_RTX (<MODE>mode))
2706         {
2707           use_zero_form = 1;
2708           break;
2709         }
2710       /* Fall through.  */
2711     default:
2712       if (!REG_P (operands[3]))
2713         operands[3] = force_reg (<MODE>mode, operands[3]);
2714
2715       break;
2716     }
2717
2718   switch (code)
2719     {
2720     case LT:
2721       if (use_zero_form)
2722         {
2723           comparison = gen_aarch64_cmlt<mode>;
2724           break;
2725         }
2726       /* Fall through.  */
2727     case UNLT:
2728       std::swap (operands[2], operands[3]);
2729       /* Fall through.  */
2730     case UNGT:
2731     case GT:
2732       comparison = gen_aarch64_cmgt<mode>;
2733       break;
2734     case LE:
2735       if (use_zero_form)
2736         {
2737           comparison = gen_aarch64_cmle<mode>;
2738           break;
2739         }
2740       /* Fall through.  */
2741     case UNLE:
2742       std::swap (operands[2], operands[3]);
2743       /* Fall through.  */
2744     case UNGE:
2745     case GE:
2746       comparison = gen_aarch64_cmge<mode>;
2747       break;
2748     case NE:
2749     case EQ:
2750       comparison = gen_aarch64_cmeq<mode>;
2751       break;
2752     case UNEQ:
2753     case ORDERED:
2754     case UNORDERED:
2755     case LTGT:
2756       break;
2757     default:
2758       gcc_unreachable ();
2759     }
2760
2761   switch (code)
2762     {
2763     case UNGE:
2764     case UNGT:
2765     case UNLE:
2766     case UNLT:
2767       {
2768         /* All of the above must not raise any FP exceptions.  Thus we first
2769            check each operand for NaNs and force any elements containing NaN to
2770            zero before using them in the compare.
2771            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2772                                      (cm<cc> (isnan (a) ? 0.0 : a,
2773                                               isnan (b) ? 0.0 : b))
2774            We use the following transformations for doing the comparisions:
2775            a UNGE b -> a GE b
2776            a UNGT b -> a GT b
2777            a UNLE b -> b GE a
2778            a UNLT b -> b GT a.  */
2779
2780         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2781         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2782         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2783         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2784         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2785         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2786         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2787                                           lowpart_subreg (<V_INT_EQUIV>mode,
2788                                                           operands[2],
2789                                                           <MODE>mode)));
2790         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2791                                           lowpart_subreg (<V_INT_EQUIV>mode,
2792                                                           operands[3],
2793                                                           <MODE>mode)));
2794         gcc_assert (comparison != NULL);
2795         emit_insn (comparison (operands[0],
2796                                lowpart_subreg (<MODE>mode,
2797                                                tmp0, <V_INT_EQUIV>mode),
2798                                lowpart_subreg (<MODE>mode,
2799                                                tmp1, <V_INT_EQUIV>mode)));
2800         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2801       }
2802       break;
2803
2804     case LT:
2805     case LE:
2806     case GT:
2807     case GE:
2808     case EQ:
2809     case NE:
2810       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2811          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2812          a GE b -> a GE b
2813          a GT b -> a GT b
2814          a LE b -> b GE a
2815          a LT b -> b GT a
2816          a EQ b -> a EQ b
2817          a NE b -> ~(a EQ b)  */
2818       gcc_assert (comparison != NULL);
2819       emit_insn (comparison (operands[0], operands[2], operands[3]));
2820       if (code == NE)
2821         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2822       break;
2823
2824     case LTGT:
2825       /* LTGT is not guranteed to not generate a FP exception.  So let's
2826          go the faster way : ((a > b) || (b > a)).  */
2827       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2828                                          operands[2], operands[3]));
2829       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2830       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2831       break;
2832
2833     case ORDERED:
2834     case UNORDERED:
2835     case UNEQ:
2836       /* cmeq (a, a) & cmeq (b, b).  */
2837       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2838                                          operands[2], operands[2]));
2839       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2840       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2841
2842       if (code == UNORDERED)
2843         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2844       else if (code == UNEQ)
2845         {
2846           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2847           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2848         }
2849       break;
2850
2851     default:
2852       gcc_unreachable ();
2853     }
2854
2855   DONE;
2856 })
2857
2858 (define_expand "vec_cmpu<mode><mode>"
2859   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2860           (match_operator 1 "comparison_operator"
2861             [(match_operand:VSDQ_I_DI 2 "register_operand")
2862              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2863   "TARGET_SIMD"
2864 {
2865   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2866                                       operands[2], operands[3]));
2867   DONE;
2868 })
2869
2870 (define_expand "vcond<mode><mode>"
2871   [(set (match_operand:VALLDI 0 "register_operand")
2872         (if_then_else:VALLDI
2873           (match_operator 3 "comparison_operator"
2874             [(match_operand:VALLDI 4 "register_operand")
2875              (match_operand:VALLDI 5 "nonmemory_operand")])
2876           (match_operand:VALLDI 1 "nonmemory_operand")
2877           (match_operand:VALLDI 2 "nonmemory_operand")))]
2878   "TARGET_SIMD"
2879 {
2880   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2881   enum rtx_code code = GET_CODE (operands[3]);
2882
2883   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2884      it as well as switch operands 1/2 in order to avoid the additional
2885      NOT instruction.  */
2886   if (code == NE)
2887     {
2888       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2889                                     operands[4], operands[5]);
2890       std::swap (operands[1], operands[2]);
2891     }
2892   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2893                                              operands[4], operands[5]));
2894   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2895                                                  operands[2], mask));
2896
2897   DONE;
2898 })
2899
2900 (define_expand "vcond<v_cmp_mixed><mode>"
2901   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2902         (if_then_else:<V_cmp_mixed>
2903           (match_operator 3 "comparison_operator"
2904             [(match_operand:VDQF_COND 4 "register_operand")
2905              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2906           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2907           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2908   "TARGET_SIMD"
2909 {
2910   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2911   enum rtx_code code = GET_CODE (operands[3]);
2912
2913   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2914      it as well as switch operands 1/2 in order to avoid the additional
2915      NOT instruction.  */
2916   if (code == NE)
2917     {
2918       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2919                                     operands[4], operands[5]);
2920       std::swap (operands[1], operands[2]);
2921     }
2922   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2923                                              operands[4], operands[5]));
2924   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2925                                                 operands[0], operands[1],
2926                                                 operands[2], mask));
2927
2928   DONE;
2929 })
2930
2931 (define_expand "vcondu<mode><mode>"
2932   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2933         (if_then_else:VSDQ_I_DI
2934           (match_operator 3 "comparison_operator"
2935             [(match_operand:VSDQ_I_DI 4 "register_operand")
2936              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2937           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2938           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2939   "TARGET_SIMD"
2940 {
2941   rtx mask = gen_reg_rtx (<MODE>mode);
2942   enum rtx_code code = GET_CODE (operands[3]);
2943
2944   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2945      it as well as switch operands 1/2 in order to avoid the additional
2946      NOT instruction.  */
2947   if (code == NE)
2948     {
2949       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2950                                     operands[4], operands[5]);
2951       std::swap (operands[1], operands[2]);
2952     }
2953   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2954                                       operands[4], operands[5]));
2955   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2956                                                  operands[2], mask));
2957   DONE;
2958 })
2959
2960 (define_expand "vcondu<mode><v_cmp_mixed>"
2961   [(set (match_operand:VDQF 0 "register_operand")
2962         (if_then_else:VDQF
2963           (match_operator 3 "comparison_operator"
2964             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2965              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2966           (match_operand:VDQF 1 "nonmemory_operand")
2967           (match_operand:VDQF 2 "nonmemory_operand")))]
2968   "TARGET_SIMD"
2969 {
2970   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2971   enum rtx_code code = GET_CODE (operands[3]);
2972
2973   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974      it as well as switch operands 1/2 in order to avoid the additional
2975      NOT instruction.  */
2976   if (code == NE)
2977     {
2978       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979                                     operands[4], operands[5]);
2980       std::swap (operands[1], operands[2]);
2981     }
2982   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2983                                                   mask, operands[3],
2984                                                   operands[4], operands[5]));
2985   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2986                                                  operands[2], mask));
2987   DONE;
2988 })
2989
2990 ;; Patterns for AArch64 SIMD Intrinsics.
2991
2992 ;; Lane extraction with sign extension to general purpose register.
2993 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2994   [(set (match_operand:GPI 0 "register_operand" "=r")
2995         (sign_extend:GPI
2996           (vec_select:<VEL>
2997             (match_operand:VDQQH 1 "register_operand" "w")
2998             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2999   "TARGET_SIMD"
3000   {
3001     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3002     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3003   }
3004   [(set_attr "type" "neon_to_gp<q>")]
3005 )
3006
3007 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3008   [(set (match_operand:SI 0 "register_operand" "=r")
3009         (zero_extend:SI
3010           (vec_select:<VEL>
3011             (match_operand:VDQQH 1 "register_operand" "w")
3012             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3013   "TARGET_SIMD"
3014   {
3015     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3016     return "umov\\t%w0, %1.<Vetype>[%2]";
3017   }
3018   [(set_attr "type" "neon_to_gp<q>")]
3019 )
3020
3021 ;; Lane extraction of a value, neither sign nor zero extension
3022 ;; is guaranteed so upper bits should be considered undefined.
3023 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3024 (define_insn "aarch64_get_lane<mode>"
3025   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3026         (vec_select:<VEL>
3027           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3028           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3029   "TARGET_SIMD"
3030   {
3031     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3032     switch (which_alternative)
3033       {
3034         case 0:
3035           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3036         case 1:
3037           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3038         case 2:
3039           return "st1\\t{%1.<Vetype>}[%2], %0";
3040         default:
3041           gcc_unreachable ();
3042       }
3043   }
3044   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3045 )
3046
3047 (define_insn "load_pair_lanes<mode>"
3048   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3049         (vec_concat:<VDBL>
3050            (match_operand:VDC 1 "memory_operand" "Utq")
3051            (match_operand:VDC 2 "memory_operand" "m")))]
3052   "TARGET_SIMD && !STRICT_ALIGNMENT
3053    && rtx_equal_p (XEXP (operands[2], 0),
3054                    plus_constant (Pmode,
3055                                   XEXP (operands[1], 0),
3056                                   GET_MODE_SIZE (<MODE>mode)))"
3057   "ldr\\t%q0, %1"
3058   [(set_attr "type" "neon_load1_1reg_q")]
3059 )
3060
3061 (define_insn "store_pair_lanes<mode>"
3062   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3063         (vec_concat:<VDBL>
3064            (match_operand:VDC 1 "register_operand" "w, r")
3065            (match_operand:VDC 2 "register_operand" "w, r")))]
3066   "TARGET_SIMD"
3067   "@
3068    stp\\t%d1, %d2, %y0
3069    stp\\t%x1, %x2, %y0"
3070   [(set_attr "type" "neon_stp, store_16")]
3071 )
3072
3073 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3074 ;; dest vector.
3075
3076 (define_insn "*aarch64_combinez<mode>"
3077   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3078         (vec_concat:<VDBL>
3079           (match_operand:VDC 1 "general_operand" "w,?r,m")
3080           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3081   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3082   "@
3083    mov\\t%0.8b, %1.8b
3084    fmov\t%d0, %1
3085    ldr\\t%d0, %1"
3086   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3087    (set_attr "simd" "yes,*,yes")
3088    (set_attr "fp" "*,yes,*")]
3089 )
3090
3091 (define_insn "*aarch64_combinez_be<mode>"
3092   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3093         (vec_concat:<VDBL>
3094           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3095           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3096   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3097   "@
3098    mov\\t%0.8b, %1.8b
3099    fmov\t%d0, %1
3100    ldr\\t%d0, %1"
3101   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3102    (set_attr "simd" "yes,*,yes")
3103    (set_attr "fp" "*,yes,*")]
3104 )
3105
3106 (define_expand "aarch64_combine<mode>"
3107   [(match_operand:<VDBL> 0 "register_operand")
3108    (match_operand:VDC 1 "register_operand")
3109    (match_operand:VDC 2 "register_operand")]
3110   "TARGET_SIMD"
3111 {
3112   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3113
3114   DONE;
3115 }
3116 )
3117
3118 (define_expand "aarch64_simd_combine<mode>"
3119   [(match_operand:<VDBL> 0 "register_operand")
3120    (match_operand:VDC 1 "register_operand")
3121    (match_operand:VDC 2 "register_operand")]
3122   "TARGET_SIMD"
3123   {
3124     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3125     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3126     DONE;
3127   }
3128 [(set_attr "type" "multiple")]
3129 )
3130
3131 ;; <su><addsub>l<q>.
3132
3133 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3134  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3135        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3136                            (match_operand:VQW 1 "register_operand" "w")
3137                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3138                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3139                            (match_operand:VQW 2 "register_operand" "w")
3140                            (match_dup 3)))))]
3141   "TARGET_SIMD"
3142   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3143   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3144 )
3145
3146 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3147  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3148        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3149                            (match_operand:VQW 1 "register_operand" "w")
3150                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3151                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3152                            (match_operand:VQW 2 "register_operand" "w")
3153                            (match_dup 3)))))]
3154   "TARGET_SIMD"
3155   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3156   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3157 )
3158
3159
3160 (define_expand "aarch64_saddl2<mode>"
3161   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3162    (match_operand:VQW 1 "register_operand" "w")
3163    (match_operand:VQW 2 "register_operand" "w")]
3164   "TARGET_SIMD"
3165 {
3166   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3167   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3168                                                   operands[2], p));
3169   DONE;
3170 })
3171
3172 (define_expand "aarch64_uaddl2<mode>"
3173   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3174    (match_operand:VQW 1 "register_operand" "w")
3175    (match_operand:VQW 2 "register_operand" "w")]
3176   "TARGET_SIMD"
3177 {
3178   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3179   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3180                                                   operands[2], p));
3181   DONE;
3182 })
3183
3184 (define_expand "aarch64_ssubl2<mode>"
3185   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186    (match_operand:VQW 1 "register_operand" "w")
3187    (match_operand:VQW 2 "register_operand" "w")]
3188   "TARGET_SIMD"
3189 {
3190   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3191   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3192                                                 operands[2], p));
3193   DONE;
3194 })
3195
3196 (define_expand "aarch64_usubl2<mode>"
3197   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198    (match_operand:VQW 1 "register_operand" "w")
3199    (match_operand:VQW 2 "register_operand" "w")]
3200   "TARGET_SIMD"
3201 {
3202   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3203   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3204                                                 operands[2], p));
3205   DONE;
3206 })
3207
3208 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3209  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3210        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3211                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3212                        (ANY_EXTEND:<VWIDE>
3213                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3214   "TARGET_SIMD"
3215   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3216   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3217 )
3218
3219 ;; <su><addsub>w<q>.
3220
3221 (define_expand "widen_ssum<mode>3"
3222   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3223         (plus:<VDBLW> (sign_extend:<VDBLW>
3224                         (match_operand:VQW 1 "register_operand" ""))
3225                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3226   "TARGET_SIMD"
3227   {
3228     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3229     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3230
3231     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3232                                                 operands[1], p));
3233     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3234     DONE;
3235   }
3236 )
3237
3238 (define_expand "widen_ssum<mode>3"
3239   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3240         (plus:<VWIDE> (sign_extend:<VWIDE>
3241                         (match_operand:VD_BHSI 1 "register_operand" ""))
3242                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3243   "TARGET_SIMD"
3244 {
3245   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3246   DONE;
3247 })
3248
3249 (define_expand "widen_usum<mode>3"
3250   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3251         (plus:<VDBLW> (zero_extend:<VDBLW>
3252                         (match_operand:VQW 1 "register_operand" ""))
3253                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3254   "TARGET_SIMD"
3255   {
3256     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3257     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3258
3259     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3260                                                  operands[1], p));
3261     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3262     DONE;
3263   }
3264 )
3265
3266 (define_expand "widen_usum<mode>3"
3267   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3268         (plus:<VWIDE> (zero_extend:<VWIDE>
3269                         (match_operand:VD_BHSI 1 "register_operand" ""))
3270                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3271   "TARGET_SIMD"
3272 {
3273   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3274   DONE;
3275 })
3276
3277 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3278   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3279         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3280                         (ANY_EXTEND:<VWIDE>
3281                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3282   "TARGET_SIMD"
3283   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3284   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3285 )
3286
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3288   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3290                         (ANY_EXTEND:<VWIDE>
3291                           (vec_select:<VHALF>
3292                            (match_operand:VQW 2 "register_operand" "w")
3293                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3294   "TARGET_SIMD"
3295   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3296   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3297 )
3298
3299 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3300   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3301         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3302                         (ANY_EXTEND:<VWIDE>
3303                           (vec_select:<VHALF>
3304                            (match_operand:VQW 2 "register_operand" "w")
3305                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3306   "TARGET_SIMD"
3307   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3309 )
3310
3311 (define_expand "aarch64_saddw2<mode>"
3312   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3313    (match_operand:<VWIDE> 1 "register_operand" "w")
3314    (match_operand:VQW 2 "register_operand" "w")]
3315   "TARGET_SIMD"
3316 {
3317   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3318   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3319                                                 operands[2], p));
3320   DONE;
3321 })
3322
3323 (define_expand "aarch64_uaddw2<mode>"
3324   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3325    (match_operand:<VWIDE> 1 "register_operand" "w")
3326    (match_operand:VQW 2 "register_operand" "w")]
3327   "TARGET_SIMD"
3328 {
3329   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3330   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3331                                                 operands[2], p));
3332   DONE;
3333 })
3334
3335
3336 (define_expand "aarch64_ssubw2<mode>"
3337   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3338    (match_operand:<VWIDE> 1 "register_operand" "w")
3339    (match_operand:VQW 2 "register_operand" "w")]
3340   "TARGET_SIMD"
3341 {
3342   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3343   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3344                                                 operands[2], p));
3345   DONE;
3346 })
3347
3348 (define_expand "aarch64_usubw2<mode>"
3349   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3350    (match_operand:<VWIDE> 1 "register_operand" "w")
3351    (match_operand:VQW 2 "register_operand" "w")]
3352   "TARGET_SIMD"
3353 {
3354   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3355   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3356                                                 operands[2], p));
3357   DONE;
3358 })
3359
3360 ;; <su><r>h<addsub>.
3361
3362 (define_insn "aarch64_<sur>h<addsub><mode>"
3363   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3364         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3365                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3366                      HADDSUB))]
3367   "TARGET_SIMD"
3368   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3369   [(set_attr "type" "neon_<addsub>_halve<q>")]
3370 )
3371
3372 ;; <r><addsub>hn<q>.
3373
3374 (define_insn "aarch64_<sur><addsub>hn<mode>"
3375   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3376         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3377                             (match_operand:VQN 2 "register_operand" "w")]
3378                            ADDSUBHN))]
3379   "TARGET_SIMD"
3380   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3381   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3382 )
3383
3384 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3385   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3386         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3387                              (match_operand:VQN 2 "register_operand" "w")
3388                              (match_operand:VQN 3 "register_operand" "w")]
3389                             ADDSUBHN2))]
3390   "TARGET_SIMD"
3391   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3392   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3393 )
3394
3395 ;; pmul.
3396
3397 (define_insn "aarch64_pmul<mode>"
3398   [(set (match_operand:VB 0 "register_operand" "=w")
3399         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3400                     (match_operand:VB 2 "register_operand" "w")]
3401                    UNSPEC_PMUL))]
3402  "TARGET_SIMD"
3403  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3404   [(set_attr "type" "neon_mul_<Vetype><q>")]
3405 )
3406
3407 ;; fmulx.
3408
3409 (define_insn "aarch64_fmulx<mode>"
3410   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3411         (unspec:VHSDF_HSDF
3412           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3413            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3414            UNSPEC_FMULX))]
3415  "TARGET_SIMD"
3416  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3417  [(set_attr "type" "neon_fp_mul_<stype>")]
3418 )
3419
3420 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3421
3422 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3423   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3424         (unspec:VDQSF
3425          [(match_operand:VDQSF 1 "register_operand" "w")
3426           (vec_duplicate:VDQSF
3427            (vec_select:<VEL>
3428             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3429             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3430          UNSPEC_FMULX))]
3431   "TARGET_SIMD"
3432   {
3433     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3434     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3435   }
3436   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3437 )
3438
3439 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3440
3441 (define_insn "*aarch64_mulx_elt<mode>"
3442   [(set (match_operand:VDQF 0 "register_operand" "=w")
3443         (unspec:VDQF
3444          [(match_operand:VDQF 1 "register_operand" "w")
3445           (vec_duplicate:VDQF
3446            (vec_select:<VEL>
3447             (match_operand:VDQF 2 "register_operand" "w")
3448             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3449          UNSPEC_FMULX))]
3450   "TARGET_SIMD"
3451   {
3452     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3453     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3454   }
3455   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3456 )
3457
3458 ;; vmulxq_lane
3459
3460 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3461   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3462         (unspec:VHSDF
3463          [(match_operand:VHSDF 1 "register_operand" "w")
3464           (vec_duplicate:VHSDF
3465             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3466          UNSPEC_FMULX))]
3467   "TARGET_SIMD"
3468   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3469   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3470 )
3471
3472 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3473 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3474 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3475
3476 (define_insn "*aarch64_vgetfmulx<mode>"
3477   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3478         (unspec:<VEL>
3479          [(match_operand:<VEL> 1 "register_operand" "w")
3480           (vec_select:<VEL>
3481            (match_operand:VDQF 2 "register_operand" "w")
3482             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3483          UNSPEC_FMULX))]
3484   "TARGET_SIMD"
3485   {
3486     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3487     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3488   }
3489   [(set_attr "type" "fmul<Vetype>")]
3490 )
3491 ;; <su>q<addsub>
3492
3493 (define_insn "aarch64_<su_optab><optab><mode>"
3494   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3495         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3496                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3497   "TARGET_SIMD"
3498   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3499   [(set_attr "type" "neon_<optab><q>")]
3500 )
3501
3502 ;; suqadd and usqadd
3503
3504 (define_insn "aarch64_<sur>qadd<mode>"
3505   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3506         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3507                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3508                        USSUQADD))]
3509   "TARGET_SIMD"
3510   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3511   [(set_attr "type" "neon_qadd<q>")]
3512 )
3513
3514 ;; sqmovun
3515
3516 (define_insn "aarch64_sqmovun<mode>"
3517   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3518         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3519                             UNSPEC_SQXTUN))]
3520    "TARGET_SIMD"
3521    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3522    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3523 )
3524
3525 ;; sqmovn and uqmovn
3526
3527 (define_insn "aarch64_<sur>qmovn<mode>"
3528   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3529         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3530                             SUQMOVN))]
3531   "TARGET_SIMD"
3532   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3533    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3534 )
3535
3536 ;; <su>q<absneg>
3537
3538 (define_insn "aarch64_s<optab><mode>"
3539   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3540         (UNQOPS:VSDQ_I
3541           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3542   "TARGET_SIMD"
3543   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3544   [(set_attr "type" "neon_<optab><q>")]
3545 )
3546
3547 ;; sq<r>dmulh.
3548
3549 (define_insn "aarch64_sq<r>dmulh<mode>"
3550   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3551         (unspec:VSDQ_HSI
3552           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3553            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3554          VQDMULH))]
3555   "TARGET_SIMD"
3556   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3557   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3558 )
3559
3560 ;; sq<r>dmulh_lane
3561
3562 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3563   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3564         (unspec:VDQHS
3565           [(match_operand:VDQHS 1 "register_operand" "w")
3566            (vec_select:<VEL>
3567              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3568              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3569          VQDMULH))]
3570   "TARGET_SIMD"
3571   "*
3572    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3573    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3574   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3575 )
3576
3577 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3578   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3579         (unspec:VDQHS
3580           [(match_operand:VDQHS 1 "register_operand" "w")
3581            (vec_select:<VEL>
3582              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3583              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3584          VQDMULH))]
3585   "TARGET_SIMD"
3586   "*
3587    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3588    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3589   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3590 )
3591
3592 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3593   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3594         (unspec:SD_HSI
3595           [(match_operand:SD_HSI 1 "register_operand" "w")
3596            (vec_select:<VEL>
3597              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3598              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3599          VQDMULH))]
3600   "TARGET_SIMD"
3601   "*
3602    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3603    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3604   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3605 )
3606
3607 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3608   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3609         (unspec:SD_HSI
3610           [(match_operand:SD_HSI 1 "register_operand" "w")
3611            (vec_select:<VEL>
3612              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3613              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3614          VQDMULH))]
3615   "TARGET_SIMD"
3616   "*
3617    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3618    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3619   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3620 )
3621
3622 ;; sqrdml[as]h.
3623
3624 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3625   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3626         (unspec:VSDQ_HSI
3627           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3628            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3629            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3630           SQRDMLH_AS))]
3631    "TARGET_SIMD_RDMA"
3632    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3633    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3634 )
3635
3636 ;; sqrdml[as]h_lane.
3637
3638 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3639   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3640         (unspec:VDQHS
3641           [(match_operand:VDQHS 1 "register_operand" "0")
3642            (match_operand:VDQHS 2 "register_operand" "w")
3643            (vec_select:<VEL>
3644              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3645              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3646           SQRDMLH_AS))]
3647    "TARGET_SIMD_RDMA"
3648    {
3649      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3650      return
3651       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3652    }
3653    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3654 )
3655
3656 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3657   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3658         (unspec:SD_HSI
3659           [(match_operand:SD_HSI 1 "register_operand" "0")
3660            (match_operand:SD_HSI 2 "register_operand" "w")
3661            (vec_select:<VEL>
3662              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3663              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3664           SQRDMLH_AS))]
3665    "TARGET_SIMD_RDMA"
3666    {
3667      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3668      return
3669       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3670    }
3671    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3672 )
3673
3674 ;; sqrdml[as]h_laneq.
3675
3676 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3677   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3678         (unspec:VDQHS
3679           [(match_operand:VDQHS 1 "register_operand" "0")
3680            (match_operand:VDQHS 2 "register_operand" "w")
3681            (vec_select:<VEL>
3682              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3684           SQRDMLH_AS))]
3685    "TARGET_SIMD_RDMA"
3686    {
3687      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3688      return
3689       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3690    }
3691    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3692 )
3693
3694 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3695   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3696         (unspec:SD_HSI
3697           [(match_operand:SD_HSI 1 "register_operand" "0")
3698            (match_operand:SD_HSI 2 "register_operand" "w")
3699            (vec_select:<VEL>
3700              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3701              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3702           SQRDMLH_AS))]
3703    "TARGET_SIMD_RDMA"
3704    {
3705      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3706      return
3707       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3708    }
3709    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3710 )
3711
3712 ;; vqdml[sa]l
3713
3714 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3715   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3716         (SBINQOPS:<VWIDE>
3717           (match_operand:<VWIDE> 1 "register_operand" "0")
3718           (ss_ashift:<VWIDE>
3719               (mult:<VWIDE>
3720                 (sign_extend:<VWIDE>
3721                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3722                 (sign_extend:<VWIDE>
3723                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3724               (const_int 1))))]
3725   "TARGET_SIMD"
3726   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3727   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3728 )
3729
3730 ;; vqdml[sa]l_lane
3731
3732 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3733   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3734         (SBINQOPS:<VWIDE>
3735           (match_operand:<VWIDE> 1 "register_operand" "0")
3736           (ss_ashift:<VWIDE>
3737             (mult:<VWIDE>
3738               (sign_extend:<VWIDE>
3739                 (match_operand:VD_HSI 2 "register_operand" "w"))
3740               (sign_extend:<VWIDE>
3741                 (vec_duplicate:VD_HSI
3742                   (vec_select:<VEL>
3743                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3744                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3745               ))
3746             (const_int 1))))]
3747   "TARGET_SIMD"
3748   {
3749     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3750     return
3751       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3752   }
3753   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3754 )
3755
3756 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3757   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3758         (SBINQOPS:<VWIDE>
3759           (match_operand:<VWIDE> 1 "register_operand" "0")
3760           (ss_ashift:<VWIDE>
3761             (mult:<VWIDE>
3762               (sign_extend:<VWIDE>
3763                 (match_operand:VD_HSI 2 "register_operand" "w"))
3764               (sign_extend:<VWIDE>
3765                 (vec_duplicate:VD_HSI
3766                   (vec_select:<VEL>
3767                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3768                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3769               ))
3770             (const_int 1))))]
3771   "TARGET_SIMD"
3772   {
3773     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3774     return
3775       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3776   }
3777   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3778 )
3779
3780 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3781   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3782         (SBINQOPS:<VWIDE>
3783           (match_operand:<VWIDE> 1 "register_operand" "0")
3784           (ss_ashift:<VWIDE>
3785             (mult:<VWIDE>
3786               (sign_extend:<VWIDE>
3787                 (match_operand:SD_HSI 2 "register_operand" "w"))
3788               (sign_extend:<VWIDE>
3789                 (vec_select:<VEL>
3790                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3791                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3792               )
3793             (const_int 1))))]
3794   "TARGET_SIMD"
3795   {
3796     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3797     return
3798       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3799   }
3800   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3801 )
3802
3803 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3804   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3805         (SBINQOPS:<VWIDE>
3806           (match_operand:<VWIDE> 1 "register_operand" "0")
3807           (ss_ashift:<VWIDE>
3808             (mult:<VWIDE>
3809               (sign_extend:<VWIDE>
3810                 (match_operand:SD_HSI 2 "register_operand" "w"))
3811               (sign_extend:<VWIDE>
3812                 (vec_select:<VEL>
3813                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3814                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3815               )
3816             (const_int 1))))]
3817   "TARGET_SIMD"
3818   {
3819     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3820     return
3821       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3822   }
3823   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3824 )
3825
3826 ;; vqdml[sa]l_n
3827
3828 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3829   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3830         (SBINQOPS:<VWIDE>
3831           (match_operand:<VWIDE> 1 "register_operand" "0")
3832           (ss_ashift:<VWIDE>
3833               (mult:<VWIDE>
3834                 (sign_extend:<VWIDE>
3835                       (match_operand:VD_HSI 2 "register_operand" "w"))
3836                 (sign_extend:<VWIDE>
3837                   (vec_duplicate:VD_HSI
3838                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3839               (const_int 1))))]
3840   "TARGET_SIMD"
3841   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3842   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3843 )
3844
3845 ;; sqdml[as]l2
3846
3847 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3848   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3849         (SBINQOPS:<VWIDE>
3850          (match_operand:<VWIDE> 1 "register_operand" "0")
3851          (ss_ashift:<VWIDE>
3852              (mult:<VWIDE>
3853                (sign_extend:<VWIDE>
3854                  (vec_select:<VHALF>
3855                      (match_operand:VQ_HSI 2 "register_operand" "w")
3856                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3857                (sign_extend:<VWIDE>
3858                  (vec_select:<VHALF>
3859                      (match_operand:VQ_HSI 3 "register_operand" "w")
3860                      (match_dup 4))))
3861              (const_int 1))))]
3862   "TARGET_SIMD"
3863   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3864   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3865 )
3866
3867 (define_expand "aarch64_sqdmlal2<mode>"
3868   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3869    (match_operand:<VWIDE> 1 "register_operand" "w")
3870    (match_operand:VQ_HSI 2 "register_operand" "w")
3871    (match_operand:VQ_HSI 3 "register_operand" "w")]
3872   "TARGET_SIMD"
3873 {
3874   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3875   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3876                                                   operands[2], operands[3], p));
3877   DONE;
3878 })
3879
3880 (define_expand "aarch64_sqdmlsl2<mode>"
3881   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3882    (match_operand:<VWIDE> 1 "register_operand" "w")
3883    (match_operand:VQ_HSI 2 "register_operand" "w")
3884    (match_operand:VQ_HSI 3 "register_operand" "w")]
3885   "TARGET_SIMD"
3886 {
3887   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3888   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3889                                                   operands[2], operands[3], p));
3890   DONE;
3891 })
3892
3893 ;; vqdml[sa]l2_lane
3894
3895 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3896   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3897         (SBINQOPS:<VWIDE>
3898           (match_operand:<VWIDE> 1 "register_operand" "0")
3899           (ss_ashift:<VWIDE>
3900               (mult:<VWIDE>
3901                 (sign_extend:<VWIDE>
3902                   (vec_select:<VHALF>
3903                     (match_operand:VQ_HSI 2 "register_operand" "w")
3904                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3905                 (sign_extend:<VWIDE>
3906                   (vec_duplicate:<VHALF>
3907                     (vec_select:<VEL>
3908                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3909                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3910                     ))))
3911               (const_int 1))))]
3912   "TARGET_SIMD"
3913   {
3914     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3915     return
3916      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3917   }
3918   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3919 )
3920
3921 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3922   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3923         (SBINQOPS:<VWIDE>
3924           (match_operand:<VWIDE> 1 "register_operand" "0")
3925           (ss_ashift:<VWIDE>
3926               (mult:<VWIDE>
3927                 (sign_extend:<VWIDE>
3928                   (vec_select:<VHALF>
3929                     (match_operand:VQ_HSI 2 "register_operand" "w")
3930                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3931                 (sign_extend:<VWIDE>
3932                   (vec_duplicate:<VHALF>
3933                     (vec_select:<VEL>
3934                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3935                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3936                     ))))
3937               (const_int 1))))]
3938   "TARGET_SIMD"
3939   {
3940     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3941     return
3942      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3943   }
3944   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3945 )
3946
3947 (define_expand "aarch64_sqdmlal2_lane<mode>"
3948   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3949    (match_operand:<VWIDE> 1 "register_operand" "w")
3950    (match_operand:VQ_HSI 2 "register_operand" "w")
3951    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3952    (match_operand:SI 4 "immediate_operand" "i")]
3953   "TARGET_SIMD"
3954 {
3955   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3956   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3957                                                        operands[2], operands[3],
3958                                                        operands[4], p));
3959   DONE;
3960 })
3961
3962 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3963   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3964    (match_operand:<VWIDE> 1 "register_operand" "w")
3965    (match_operand:VQ_HSI 2 "register_operand" "w")
3966    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3967    (match_operand:SI 4 "immediate_operand" "i")]
3968   "TARGET_SIMD"
3969 {
3970   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3971   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3972                                                        operands[2], operands[3],
3973                                                        operands[4], p));
3974   DONE;
3975 })
3976
3977 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3978   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3979    (match_operand:<VWIDE> 1 "register_operand" "w")
3980    (match_operand:VQ_HSI 2 "register_operand" "w")
3981    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3982    (match_operand:SI 4 "immediate_operand" "i")]
3983   "TARGET_SIMD"
3984 {
3985   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3986   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3987                                                        operands[2], operands[3],
3988                                                        operands[4], p));
3989   DONE;
3990 })
3991
3992 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3993   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3994    (match_operand:<VWIDE> 1 "register_operand" "w")
3995    (match_operand:VQ_HSI 2 "register_operand" "w")
3996    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3997    (match_operand:SI 4 "immediate_operand" "i")]
3998   "TARGET_SIMD"
3999 {
4000   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4001   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4002                                                        operands[2], operands[3],
4003                                                        operands[4], p));
4004   DONE;
4005 })
4006
4007 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4008   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4009         (SBINQOPS:<VWIDE>
4010           (match_operand:<VWIDE> 1 "register_operand" "0")
4011           (ss_ashift:<VWIDE>
4012             (mult:<VWIDE>
4013               (sign_extend:<VWIDE>
4014                 (vec_select:<VHALF>
4015                   (match_operand:VQ_HSI 2 "register_operand" "w")
4016                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4017               (sign_extend:<VWIDE>
4018                 (vec_duplicate:<VHALF>
4019                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4020             (const_int 1))))]
4021   "TARGET_SIMD"
4022   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4023   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4024 )
4025
4026 (define_expand "aarch64_sqdmlal2_n<mode>"
4027   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4028    (match_operand:<VWIDE> 1 "register_operand" "w")
4029    (match_operand:VQ_HSI 2 "register_operand" "w")
4030    (match_operand:<VEL> 3 "register_operand" "w")]
4031   "TARGET_SIMD"
4032 {
4033   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4034   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4035                                                     operands[2], operands[3],
4036                                                     p));
4037   DONE;
4038 })
4039
4040 (define_expand "aarch64_sqdmlsl2_n<mode>"
4041   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4042    (match_operand:<VWIDE> 1 "register_operand" "w")
4043    (match_operand:VQ_HSI 2 "register_operand" "w")
4044    (match_operand:<VEL> 3 "register_operand" "w")]
4045   "TARGET_SIMD"
4046 {
4047   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4048   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4049                                                     operands[2], operands[3],
4050                                                     p));
4051   DONE;
4052 })
4053
4054 ;; vqdmull
4055
4056 (define_insn "aarch64_sqdmull<mode>"
4057   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4058         (ss_ashift:<VWIDE>
4059              (mult:<VWIDE>
4060                (sign_extend:<VWIDE>
4061                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4062                (sign_extend:<VWIDE>
4063                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4064              (const_int 1)))]
4065   "TARGET_SIMD"
4066   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4067   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4068 )
4069
4070 ;; vqdmull_lane
4071
4072 (define_insn "aarch64_sqdmull_lane<mode>"
4073   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4074         (ss_ashift:<VWIDE>
4075              (mult:<VWIDE>
4076                (sign_extend:<VWIDE>
4077                  (match_operand:VD_HSI 1 "register_operand" "w"))
4078                (sign_extend:<VWIDE>
4079                  (vec_duplicate:VD_HSI
4080                    (vec_select:<VEL>
4081                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4082                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4083                ))
4084              (const_int 1)))]
4085   "TARGET_SIMD"
4086   {
4087     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4088     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4089   }
4090   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4091 )
4092
4093 (define_insn "aarch64_sqdmull_laneq<mode>"
4094   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4095         (ss_ashift:<VWIDE>
4096              (mult:<VWIDE>
4097                (sign_extend:<VWIDE>
4098                  (match_operand:VD_HSI 1 "register_operand" "w"))
4099                (sign_extend:<VWIDE>
4100                  (vec_duplicate:VD_HSI
4101                    (vec_select:<VEL>
4102                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4103                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4104                ))
4105              (const_int 1)))]
4106   "TARGET_SIMD"
4107   {
4108     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4109     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4110   }
4111   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4112 )
4113
4114 (define_insn "aarch64_sqdmull_lane<mode>"
4115   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4116         (ss_ashift:<VWIDE>
4117              (mult:<VWIDE>
4118                (sign_extend:<VWIDE>
4119                  (match_operand:SD_HSI 1 "register_operand" "w"))
4120                (sign_extend:<VWIDE>
4121                  (vec_select:<VEL>
4122                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4123                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4124                ))
4125              (const_int 1)))]
4126   "TARGET_SIMD"
4127   {
4128     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4129     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4130   }
4131   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4132 )
4133
4134 (define_insn "aarch64_sqdmull_laneq<mode>"
4135   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4136         (ss_ashift:<VWIDE>
4137              (mult:<VWIDE>
4138                (sign_extend:<VWIDE>
4139                  (match_operand:SD_HSI 1 "register_operand" "w"))
4140                (sign_extend:<VWIDE>
4141                  (vec_select:<VEL>
4142                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4143                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4144                ))
4145              (const_int 1)))]
4146   "TARGET_SIMD"
4147   {
4148     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4149     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4150   }
4151   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4152 )
4153
4154 ;; vqdmull_n
4155
4156 (define_insn "aarch64_sqdmull_n<mode>"
4157   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4158         (ss_ashift:<VWIDE>
4159              (mult:<VWIDE>
4160                (sign_extend:<VWIDE>
4161                  (match_operand:VD_HSI 1 "register_operand" "w"))
4162                (sign_extend:<VWIDE>
4163                  (vec_duplicate:VD_HSI
4164                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4165                )
4166              (const_int 1)))]
4167   "TARGET_SIMD"
4168   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4169   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4170 )
4171
4172 ;; vqdmull2
4173
4174
4175
4176 (define_insn "aarch64_sqdmull2<mode>_internal"
4177   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4178         (ss_ashift:<VWIDE>
4179              (mult:<VWIDE>
4180                (sign_extend:<VWIDE>
4181                  (vec_select:<VHALF>
4182                    (match_operand:VQ_HSI 1 "register_operand" "w")
4183                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4184                (sign_extend:<VWIDE>
4185                  (vec_select:<VHALF>
4186                    (match_operand:VQ_HSI 2 "register_operand" "w")
4187                    (match_dup 3)))
4188                )
4189              (const_int 1)))]
4190   "TARGET_SIMD"
4191   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4192   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4193 )
4194
4195 (define_expand "aarch64_sqdmull2<mode>"
4196   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4197    (match_operand:VQ_HSI 1 "register_operand" "w")
4198    (match_operand:VQ_HSI 2 "register_operand" "w")]
4199   "TARGET_SIMD"
4200 {
4201   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4202   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4203                                                   operands[2], p));
4204   DONE;
4205 })
4206
4207 ;; vqdmull2_lane
4208
4209 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4210   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4211         (ss_ashift:<VWIDE>
4212              (mult:<VWIDE>
4213                (sign_extend:<VWIDE>
4214                  (vec_select:<VHALF>
4215                    (match_operand:VQ_HSI 1 "register_operand" "w")
4216                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4217                (sign_extend:<VWIDE>
4218                  (vec_duplicate:<VHALF>
4219                    (vec_select:<VEL>
4220                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4221                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4222                ))
4223              (const_int 1)))]
4224   "TARGET_SIMD"
4225   {
4226     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4227     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4228   }
4229   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4230 )
4231
4232 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4233   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234         (ss_ashift:<VWIDE>
4235              (mult:<VWIDE>
4236                (sign_extend:<VWIDE>
4237                  (vec_select:<VHALF>
4238                    (match_operand:VQ_HSI 1 "register_operand" "w")
4239                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4240                (sign_extend:<VWIDE>
4241                  (vec_duplicate:<VHALF>
4242                    (vec_select:<VEL>
4243                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4244                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4245                ))
4246              (const_int 1)))]
4247   "TARGET_SIMD"
4248   {
4249     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4250     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4251   }
4252   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4253 )
4254
4255 (define_expand "aarch64_sqdmull2_lane<mode>"
4256   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4257    (match_operand:VQ_HSI 1 "register_operand" "w")
4258    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4259    (match_operand:SI 3 "immediate_operand" "i")]
4260   "TARGET_SIMD"
4261 {
4262   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4263   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4264                                                        operands[2], operands[3],
4265                                                        p));
4266   DONE;
4267 })
4268
4269 (define_expand "aarch64_sqdmull2_laneq<mode>"
4270   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4271    (match_operand:VQ_HSI 1 "register_operand" "w")
4272    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4273    (match_operand:SI 3 "immediate_operand" "i")]
4274   "TARGET_SIMD"
4275 {
4276   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4277   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4278                                                        operands[2], operands[3],
4279                                                        p));
4280   DONE;
4281 })
4282
4283 ;; vqdmull2_n
4284
4285 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4286   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4287         (ss_ashift:<VWIDE>
4288              (mult:<VWIDE>
4289                (sign_extend:<VWIDE>
4290                  (vec_select:<VHALF>
4291                    (match_operand:VQ_HSI 1 "register_operand" "w")
4292                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4293                (sign_extend:<VWIDE>
4294                  (vec_duplicate:<VHALF>
4295                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4296                )
4297              (const_int 1)))]
4298   "TARGET_SIMD"
4299   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4300   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4301 )
4302
4303 (define_expand "aarch64_sqdmull2_n<mode>"
4304   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4305    (match_operand:VQ_HSI 1 "register_operand" "w")
4306    (match_operand:<VEL> 2 "register_operand" "w")]
4307   "TARGET_SIMD"
4308 {
4309   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4310   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4311                                                     operands[2], p));
4312   DONE;
4313 })
4314
4315 ;; vshl
4316
4317 (define_insn "aarch64_<sur>shl<mode>"
4318   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4319         (unspec:VSDQ_I_DI
4320           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4321            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4322          VSHL))]
4323   "TARGET_SIMD"
4324   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4325   [(set_attr "type" "neon_shift_reg<q>")]
4326 )
4327
4328
4329 ;; vqshl
4330
4331 (define_insn "aarch64_<sur>q<r>shl<mode>"
4332   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4333         (unspec:VSDQ_I
4334           [(match_operand:VSDQ_I 1 "register_operand" "w")
4335            (match_operand:VSDQ_I 2 "register_operand" "w")]
4336          VQSHL))]
4337   "TARGET_SIMD"
4338   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4339   [(set_attr "type" "neon_sat_shift_reg<q>")]
4340 )
4341
4342 ;; vshll_n
4343
4344 (define_insn "aarch64_<sur>shll_n<mode>"
4345   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4346         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4347                          (match_operand:SI 2
4348                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4349                          VSHLL))]
4350   "TARGET_SIMD"
4351   {
4352     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4353       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4354     else
4355       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4356   }
4357   [(set_attr "type" "neon_shift_imm_long")]
4358 )
4359
4360 ;; vshll_high_n
4361
4362 (define_insn "aarch64_<sur>shll2_n<mode>"
4363   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4364         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4365                          (match_operand:SI 2 "immediate_operand" "i")]
4366                          VSHLL))]
4367   "TARGET_SIMD"
4368   {
4369     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4370       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4371     else
4372       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4373   }
4374   [(set_attr "type" "neon_shift_imm_long")]
4375 )
4376
4377 ;; vrshr_n
4378
4379 (define_insn "aarch64_<sur>shr_n<mode>"
4380   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4381         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4382                            (match_operand:SI 2
4383                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4384                           VRSHR_N))]
4385   "TARGET_SIMD"
4386   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4387   [(set_attr "type" "neon_sat_shift_imm<q>")]
4388 )
4389
4390 ;; v(r)sra_n
4391
4392 (define_insn "aarch64_<sur>sra_n<mode>"
4393   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4394         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4395                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4396                        (match_operand:SI 3
4397                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4398                       VSRA))]
4399   "TARGET_SIMD"
4400   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4401   [(set_attr "type" "neon_shift_acc<q>")]
4402 )
4403
4404 ;; vs<lr>i_n
4405
4406 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4407   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4408         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4409                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4410                        (match_operand:SI 3
4411                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4412                       VSLRI))]
4413   "TARGET_SIMD"
4414   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4415   [(set_attr "type" "neon_shift_imm<q>")]
4416 )
4417
4418 ;; vqshl(u)
4419
4420 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4421   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4422         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4423                        (match_operand:SI 2
4424                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4425                       VQSHL_N))]
4426   "TARGET_SIMD"
4427   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4428   [(set_attr "type" "neon_sat_shift_imm<q>")]
4429 )
4430
4431
4432 ;; vq(r)shr(u)n_n
4433
4434 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4435   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4436         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4437                             (match_operand:SI 2
4438                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4439                            VQSHRN_N))]
4440   "TARGET_SIMD"
4441   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4442   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4443 )
4444
4445
4446 ;; cm(eq|ge|gt|lt|le)
4447 ;; Note, we have constraints for Dz and Z as different expanders
4448 ;; have different ideas of what should be passed to this pattern.
4449
4450 (define_insn "aarch64_cm<optab><mode>"
4451   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4452         (neg:<V_INT_EQUIV>
4453           (COMPARISONS:<V_INT_EQUIV>
4454             (match_operand:VDQ_I 1 "register_operand" "w,w")
4455             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4456           )))]
4457   "TARGET_SIMD"
4458   "@
4459   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4460   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4461   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4462 )
4463
4464 (define_insn_and_split "aarch64_cm<optab>di"
4465   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4466         (neg:DI
4467           (COMPARISONS:DI
4468             (match_operand:DI 1 "register_operand" "w,w,r")
4469             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4470           )))
4471      (clobber (reg:CC CC_REGNUM))]
4472   "TARGET_SIMD"
4473   "#"
4474   "&& reload_completed"
4475   [(set (match_operand:DI 0 "register_operand")
4476         (neg:DI
4477           (COMPARISONS:DI
4478             (match_operand:DI 1 "register_operand")
4479             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4480           )))]
4481   {
4482     /* If we are in the general purpose register file,
4483        we split to a sequence of comparison and store.  */
4484     if (GP_REGNUM_P (REGNO (operands[0]))
4485         && GP_REGNUM_P (REGNO (operands[1])))
4486       {
4487         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4488         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4489         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4490         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4491         DONE;
4492       }
4493     /* Otherwise, we expand to a similar pattern which does not
4494        clobber CC_REGNUM.  */
4495   }
4496   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4497 )
4498
4499 (define_insn "*aarch64_cm<optab>di"
4500   [(set (match_operand:DI 0 "register_operand" "=w,w")
4501         (neg:DI
4502           (COMPARISONS:DI
4503             (match_operand:DI 1 "register_operand" "w,w")
4504             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4505           )))]
4506   "TARGET_SIMD && reload_completed"
4507   "@
4508   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4509   cm<optab>\t%d0, %d1, #0"
4510   [(set_attr "type" "neon_compare, neon_compare_zero")]
4511 )
4512
4513 ;; cm(hs|hi)
4514
4515 (define_insn "aarch64_cm<optab><mode>"
4516   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4517         (neg:<V_INT_EQUIV>
4518           (UCOMPARISONS:<V_INT_EQUIV>
4519             (match_operand:VDQ_I 1 "register_operand" "w")
4520             (match_operand:VDQ_I 2 "register_operand" "w")
4521           )))]
4522   "TARGET_SIMD"
4523   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4524   [(set_attr "type" "neon_compare<q>")]
4525 )
4526
4527 (define_insn_and_split "aarch64_cm<optab>di"
4528   [(set (match_operand:DI 0 "register_operand" "=w,r")
4529         (neg:DI
4530           (UCOMPARISONS:DI
4531             (match_operand:DI 1 "register_operand" "w,r")
4532             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4533           )))
4534     (clobber (reg:CC CC_REGNUM))]
4535   "TARGET_SIMD"
4536   "#"
4537   "&& reload_completed"
4538   [(set (match_operand:DI 0 "register_operand")
4539         (neg:DI
4540           (UCOMPARISONS:DI
4541             (match_operand:DI 1 "register_operand")
4542             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4543           )))]
4544   {
4545     /* If we are in the general purpose register file,
4546        we split to a sequence of comparison and store.  */
4547     if (GP_REGNUM_P (REGNO (operands[0]))
4548         && GP_REGNUM_P (REGNO (operands[1])))
4549       {
4550         machine_mode mode = CCmode;
4551         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4552         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4553         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4554         DONE;
4555       }
4556     /* Otherwise, we expand to a similar pattern which does not
4557        clobber CC_REGNUM.  */
4558   }
4559   [(set_attr "type" "neon_compare,multiple")]
4560 )
4561
4562 (define_insn "*aarch64_cm<optab>di"
4563   [(set (match_operand:DI 0 "register_operand" "=w")
4564         (neg:DI
4565           (UCOMPARISONS:DI
4566             (match_operand:DI 1 "register_operand" "w")
4567             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4568           )))]
4569   "TARGET_SIMD && reload_completed"
4570   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4571   [(set_attr "type" "neon_compare")]
4572 )
4573
4574 ;; cmtst
4575
4576 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4577 ;; we don't have any insns using ne, and aarch64_vcond outputs
4578 ;; not (neg (eq (and x y) 0))
4579 ;; which is rewritten by simplify_rtx as
4580 ;; plus (eq (and x y) 0) -1.
4581
4582 (define_insn "aarch64_cmtst<mode>"
4583   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4584         (plus:<V_INT_EQUIV>
4585           (eq:<V_INT_EQUIV>
4586             (and:VDQ_I
4587               (match_operand:VDQ_I 1 "register_operand" "w")
4588               (match_operand:VDQ_I 2 "register_operand" "w"))
4589             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4590           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4591   ]
4592   "TARGET_SIMD"
4593   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4594   [(set_attr "type" "neon_tst<q>")]
4595 )
4596
4597 (define_insn_and_split "aarch64_cmtstdi"
4598   [(set (match_operand:DI 0 "register_operand" "=w,r")
4599         (neg:DI
4600           (ne:DI
4601             (and:DI
4602               (match_operand:DI 1 "register_operand" "w,r")
4603               (match_operand:DI 2 "register_operand" "w,r"))
4604             (const_int 0))))
4605     (clobber (reg:CC CC_REGNUM))]
4606   "TARGET_SIMD"
4607   "#"
4608   "&& reload_completed"
4609   [(set (match_operand:DI 0 "register_operand")
4610         (neg:DI
4611           (ne:DI
4612             (and:DI
4613               (match_operand:DI 1 "register_operand")
4614               (match_operand:DI 2 "register_operand"))
4615             (const_int 0))))]
4616   {
4617     /* If we are in the general purpose register file,
4618        we split to a sequence of comparison and store.  */
4619     if (GP_REGNUM_P (REGNO (operands[0]))
4620         && GP_REGNUM_P (REGNO (operands[1])))
4621       {
4622         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4623         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4624         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4625         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4626         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4627         DONE;
4628       }
4629     /* Otherwise, we expand to a similar pattern which does not
4630        clobber CC_REGNUM.  */
4631   }
4632   [(set_attr "type" "neon_tst,multiple")]
4633 )
4634
4635 (define_insn "*aarch64_cmtstdi"
4636   [(set (match_operand:DI 0 "register_operand" "=w")
4637         (neg:DI
4638           (ne:DI
4639             (and:DI
4640               (match_operand:DI 1 "register_operand" "w")
4641               (match_operand:DI 2 "register_operand" "w"))
4642             (const_int 0))))]
4643   "TARGET_SIMD"
4644   "cmtst\t%d0, %d1, %d2"
4645   [(set_attr "type" "neon_tst")]
4646 )
4647
4648 ;; fcm(eq|ge|gt|le|lt)
4649
4650 (define_insn "aarch64_cm<optab><mode>"
4651   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4652         (neg:<V_INT_EQUIV>
4653           (COMPARISONS:<V_INT_EQUIV>
4654             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4655             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4656           )))]
4657   "TARGET_SIMD"
4658   "@
4659   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4660   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4661   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4662 )
4663
4664 ;; fac(ge|gt)
4665 ;; Note we can also handle what would be fac(le|lt) by
4666 ;; generating fac(ge|gt).
4667
4668 (define_insn "aarch64_fac<optab><mode>"
4669   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4670         (neg:<V_INT_EQUIV>
4671           (FAC_COMPARISONS:<V_INT_EQUIV>
4672             (abs:VHSDF_HSDF
4673               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4674             (abs:VHSDF_HSDF
4675               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4676   )))]
4677   "TARGET_SIMD"
4678   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4679   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4680 )
4681
4682 ;; addp
4683
4684 (define_insn "aarch64_addp<mode>"
4685   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4686         (unspec:VD_BHSI
4687           [(match_operand:VD_BHSI 1 "register_operand" "w")
4688            (match_operand:VD_BHSI 2 "register_operand" "w")]
4689           UNSPEC_ADDP))]
4690   "TARGET_SIMD"
4691   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4692   [(set_attr "type" "neon_reduc_add<q>")]
4693 )
4694
4695 (define_insn "aarch64_addpdi"
4696   [(set (match_operand:DI 0 "register_operand" "=w")
4697         (unspec:DI
4698           [(match_operand:V2DI 1 "register_operand" "w")]
4699           UNSPEC_ADDP))]
4700   "TARGET_SIMD"
4701   "addp\t%d0, %1.2d"
4702   [(set_attr "type" "neon_reduc_add")]
4703 )
4704
4705 ;; sqrt
4706
4707 (define_expand "sqrt<mode>2"
4708   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4709         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4710   "TARGET_SIMD"
4711 {
4712   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4713     DONE;
4714 })
4715
4716 (define_insn "*sqrt<mode>2"
4717   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4718         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4719   "TARGET_SIMD"
4720   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4721   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4722 )
4723
4724 ;; Patterns for vector struct loads and stores.
4725
4726 (define_insn "aarch64_simd_ld2<mode>"
4727   [(set (match_operand:OI 0 "register_operand" "=w")
4728         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4729                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4730                    UNSPEC_LD2))]
4731   "TARGET_SIMD"
4732   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4733   [(set_attr "type" "neon_load2_2reg<q>")]
4734 )
4735
4736 (define_insn "aarch64_simd_ld2r<mode>"
4737   [(set (match_operand:OI 0 "register_operand" "=w")
4738        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4739                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4740                   UNSPEC_LD2_DUP))]
4741   "TARGET_SIMD"
4742   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4743   [(set_attr "type" "neon_load2_all_lanes<q>")]
4744 )
4745
4746 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4747   [(set (match_operand:OI 0 "register_operand" "=w")
4748         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4749                     (match_operand:OI 2 "register_operand" "0")
4750                     (match_operand:SI 3 "immediate_operand" "i")
4751                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4752                    UNSPEC_LD2_LANE))]
4753   "TARGET_SIMD"
4754   {
4755     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4756     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4757   }
4758   [(set_attr "type" "neon_load2_one_lane")]
4759 )
4760
4761 (define_expand "vec_load_lanesoi<mode>"
4762   [(set (match_operand:OI 0 "register_operand" "=w")
4763         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4764                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4765                    UNSPEC_LD2))]
4766   "TARGET_SIMD"
4767 {
4768   if (BYTES_BIG_ENDIAN)
4769     {
4770       rtx tmp = gen_reg_rtx (OImode);
4771       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4772       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4773       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4774     }
4775   else
4776     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4777   DONE;
4778 })
4779
4780 (define_insn "aarch64_simd_st2<mode>"
4781   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4782         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4783                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4784                    UNSPEC_ST2))]
4785   "TARGET_SIMD"
4786   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4787   [(set_attr "type" "neon_store2_2reg<q>")]
4788 )
4789
4790 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4791 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4792   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4793         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4794                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4795                     (match_operand:SI 2 "immediate_operand" "i")]
4796                    UNSPEC_ST2_LANE))]
4797   "TARGET_SIMD"
4798   {
4799     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4800     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4801   }
4802   [(set_attr "type" "neon_store2_one_lane<q>")]
4803 )
4804
4805 (define_expand "vec_store_lanesoi<mode>"
4806   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4807         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4808                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4809                    UNSPEC_ST2))]
4810   "TARGET_SIMD"
4811 {
4812   if (BYTES_BIG_ENDIAN)
4813     {
4814       rtx tmp = gen_reg_rtx (OImode);
4815       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4816       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4817       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4818     }
4819   else
4820     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4821   DONE;
4822 })
4823
4824 (define_insn "aarch64_simd_ld3<mode>"
4825   [(set (match_operand:CI 0 "register_operand" "=w")
4826         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4827                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4828                    UNSPEC_LD3))]
4829   "TARGET_SIMD"
4830   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4831   [(set_attr "type" "neon_load3_3reg<q>")]
4832 )
4833
4834 (define_insn "aarch64_simd_ld3r<mode>"
4835   [(set (match_operand:CI 0 "register_operand" "=w")
4836        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4837                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4838                   UNSPEC_LD3_DUP))]
4839   "TARGET_SIMD"
4840   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4841   [(set_attr "type" "neon_load3_all_lanes<q>")]
4842 )
4843
4844 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4845   [(set (match_operand:CI 0 "register_operand" "=w")
4846         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4847                     (match_operand:CI 2 "register_operand" "0")
4848                     (match_operand:SI 3 "immediate_operand" "i")
4849                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4850                    UNSPEC_LD3_LANE))]
4851   "TARGET_SIMD"
4852 {
4853     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4854     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4855 }
4856   [(set_attr "type" "neon_load3_one_lane")]
4857 )
4858
4859 (define_expand "vec_load_lanesci<mode>"
4860   [(set (match_operand:CI 0 "register_operand" "=w")
4861         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4862                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4863                    UNSPEC_LD3))]
4864   "TARGET_SIMD"
4865 {
4866   if (BYTES_BIG_ENDIAN)
4867     {
4868       rtx tmp = gen_reg_rtx (CImode);
4869       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4870       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4871       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4872     }
4873   else
4874     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4875   DONE;
4876 })
4877
4878 (define_insn "aarch64_simd_st3<mode>"
4879   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4880         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4881                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4882                    UNSPEC_ST3))]
4883   "TARGET_SIMD"
4884   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4885   [(set_attr "type" "neon_store3_3reg<q>")]
4886 )
4887
4888 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4889 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4890   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4891         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4892                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4893                      (match_operand:SI 2 "immediate_operand" "i")]
4894                     UNSPEC_ST3_LANE))]
4895   "TARGET_SIMD"
4896   {
4897     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4898     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4899   }
4900   [(set_attr "type" "neon_store3_one_lane<q>")]
4901 )
4902
4903 (define_expand "vec_store_lanesci<mode>"
4904   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4905         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4906                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4907                    UNSPEC_ST3))]
4908   "TARGET_SIMD"
4909 {
4910   if (BYTES_BIG_ENDIAN)
4911     {
4912       rtx tmp = gen_reg_rtx (CImode);
4913       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4914       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4915       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4916     }
4917   else
4918     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4919   DONE;
4920 })
4921
4922 (define_insn "aarch64_simd_ld4<mode>"
4923   [(set (match_operand:XI 0 "register_operand" "=w")
4924         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4925                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926                    UNSPEC_LD4))]
4927   "TARGET_SIMD"
4928   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4929   [(set_attr "type" "neon_load4_4reg<q>")]
4930 )
4931
4932 (define_insn "aarch64_simd_ld4r<mode>"
4933   [(set (match_operand:XI 0 "register_operand" "=w")
4934        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4935                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4936                   UNSPEC_LD4_DUP))]
4937   "TARGET_SIMD"
4938   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4939   [(set_attr "type" "neon_load4_all_lanes<q>")]
4940 )
4941
4942 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4943   [(set (match_operand:XI 0 "register_operand" "=w")
4944         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4945                     (match_operand:XI 2 "register_operand" "0")
4946                     (match_operand:SI 3 "immediate_operand" "i")
4947                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4948                    UNSPEC_LD4_LANE))]
4949   "TARGET_SIMD"
4950 {
4951     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4952     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4953 }
4954   [(set_attr "type" "neon_load4_one_lane")]
4955 )
4956
4957 (define_expand "vec_load_lanesxi<mode>"
4958   [(set (match_operand:XI 0 "register_operand" "=w")
4959         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4960                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4961                    UNSPEC_LD4))]
4962   "TARGET_SIMD"
4963 {
4964   if (BYTES_BIG_ENDIAN)
4965     {
4966       rtx tmp = gen_reg_rtx (XImode);
4967       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4968       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4969       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4970     }
4971   else
4972     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4973   DONE;
4974 })
4975
4976 (define_insn "aarch64_simd_st4<mode>"
4977   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4978         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4979                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4980                    UNSPEC_ST4))]
4981   "TARGET_SIMD"
4982   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4983   [(set_attr "type" "neon_store4_4reg<q>")]
4984 )
4985
4986 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4987 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4988   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4989         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4990                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4991                      (match_operand:SI 2 "immediate_operand" "i")]
4992                     UNSPEC_ST4_LANE))]
4993   "TARGET_SIMD"
4994   {
4995     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4996     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4997   }
4998   [(set_attr "type" "neon_store4_one_lane<q>")]
4999 )
5000
5001 (define_expand "vec_store_lanesxi<mode>"
5002   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5003         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5004                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5005                    UNSPEC_ST4))]
5006   "TARGET_SIMD"
5007 {
5008   if (BYTES_BIG_ENDIAN)
5009     {
5010       rtx tmp = gen_reg_rtx (XImode);
5011       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5012       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5013       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5014     }
5015   else
5016     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5017   DONE;
5018 })
5019
5020 (define_insn_and_split "aarch64_rev_reglist<mode>"
5021 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5022         (unspec:VSTRUCT
5023                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5024                     (match_operand:V16QI 2 "register_operand" "w")]
5025                    UNSPEC_REV_REGLIST))]
5026   "TARGET_SIMD"
5027   "#"
5028   "&& reload_completed"
5029   [(const_int 0)]
5030 {
5031   int i;
5032   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5033   for (i = 0; i < nregs; i++)
5034     {
5035       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5036       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5037       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5038     }
5039   DONE;
5040 }
5041   [(set_attr "type" "neon_tbl1_q")
5042    (set_attr "length" "<insn_count>")]
5043 )
5044
5045 ;; Reload patterns for AdvSIMD register list operands.
5046
5047 (define_expand "mov<mode>"
5048   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5049         (match_operand:VSTRUCT 1 "general_operand" ""))]
5050   "TARGET_SIMD"
5051 {
5052   if (can_create_pseudo_p ())
5053     {
5054       if (GET_CODE (operands[0]) != REG)
5055         operands[1] = force_reg (<MODE>mode, operands[1]);
5056     }
5057 })
5058
5059 (define_insn "*aarch64_mov<mode>"
5060   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5061         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5062   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5063    && (register_operand (operands[0], <MODE>mode)
5064        || register_operand (operands[1], <MODE>mode))"
5065   "@
5066    #
5067    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5068    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5069   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5070                      neon_load<nregs>_<nregs>reg_q")
5071    (set_attr "length" "<insn_count>,4,4")]
5072 )
5073
5074 (define_insn "aarch64_be_ld1<mode>"
5075   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5076         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5077                              "aarch64_simd_struct_operand" "Utv")]
5078         UNSPEC_LD1))]
5079   "TARGET_SIMD"
5080   "ld1\\t{%0<Vmtype>}, %1"
5081   [(set_attr "type" "neon_load1_1reg<q>")]
5082 )
5083
5084 (define_insn "aarch64_be_st1<mode>"
5085   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5086         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5087         UNSPEC_ST1))]
5088   "TARGET_SIMD"
5089   "st1\\t{%1<Vmtype>}, %0"
5090   [(set_attr "type" "neon_store1_1reg<q>")]
5091 )
5092
5093 (define_insn "*aarch64_be_movoi"
5094   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5095         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5096   "TARGET_SIMD && BYTES_BIG_ENDIAN
5097    && (register_operand (operands[0], OImode)
5098        || register_operand (operands[1], OImode))"
5099   "@
5100    #
5101    stp\\t%q1, %R1, %0
5102    ldp\\t%q0, %R0, %1"
5103   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5104    (set_attr "length" "8,4,4")]
5105 )
5106
5107 (define_insn "*aarch64_be_movci"
5108   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5109         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5110   "TARGET_SIMD && BYTES_BIG_ENDIAN
5111    && (register_operand (operands[0], CImode)
5112        || register_operand (operands[1], CImode))"
5113   "#"
5114   [(set_attr "type" "multiple")
5115    (set_attr "length" "12,4,4")]
5116 )
5117
5118 (define_insn "*aarch64_be_movxi"
5119   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5120         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5121   "TARGET_SIMD && BYTES_BIG_ENDIAN
5122    && (register_operand (operands[0], XImode)
5123        || register_operand (operands[1], XImode))"
5124   "#"
5125   [(set_attr "type" "multiple")
5126    (set_attr "length" "16,4,4")]
5127 )
5128
5129 (define_split
5130   [(set (match_operand:OI 0 "register_operand")
5131         (match_operand:OI 1 "register_operand"))]
5132   "TARGET_SIMD && reload_completed"
5133   [(const_int 0)]
5134 {
5135   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5136   DONE;
5137 })
5138
5139 (define_split
5140   [(set (match_operand:CI 0 "nonimmediate_operand")
5141         (match_operand:CI 1 "general_operand"))]
5142   "TARGET_SIMD && reload_completed"
5143   [(const_int 0)]
5144 {
5145   if (register_operand (operands[0], CImode)
5146       && register_operand (operands[1], CImode))
5147     {
5148       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5149       DONE;
5150     }
5151   else if (BYTES_BIG_ENDIAN)
5152     {
5153       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5154                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5155       emit_move_insn (gen_lowpart (V16QImode,
5156                                    simplify_gen_subreg (TImode, operands[0],
5157                                                         CImode, 32)),
5158                       gen_lowpart (V16QImode,
5159                                    simplify_gen_subreg (TImode, operands[1],
5160                                                         CImode, 32)));
5161       DONE;
5162     }
5163   else
5164     FAIL;
5165 })
5166
5167 (define_split
5168   [(set (match_operand:XI 0 "nonimmediate_operand")
5169         (match_operand:XI 1 "general_operand"))]
5170   "TARGET_SIMD && reload_completed"
5171   [(const_int 0)]
5172 {
5173   if (register_operand (operands[0], XImode)
5174       && register_operand (operands[1], XImode))
5175     {
5176       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5177       DONE;
5178     }
5179   else if (BYTES_BIG_ENDIAN)
5180     {
5181       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5182                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5183       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5184                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5185       DONE;
5186     }
5187   else
5188     FAIL;
5189 })
5190
5191 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5192   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5193    (match_operand:DI 1 "register_operand" "w")
5194    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5195   "TARGET_SIMD"
5196 {
5197   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5198   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5199                      * <VSTRUCT:nregs>);
5200
5201   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5202                                                                 mem));
5203   DONE;
5204 })
5205
5206 (define_insn "aarch64_ld2<mode>_dreg"
5207   [(set (match_operand:OI 0 "register_operand" "=w")
5208         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5209                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5210                    UNSPEC_LD2_DREG))]
5211   "TARGET_SIMD"
5212   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5213   [(set_attr "type" "neon_load2_2reg<q>")]
5214 )
5215
5216 (define_insn "aarch64_ld2<mode>_dreg"
5217   [(set (match_operand:OI 0 "register_operand" "=w")
5218         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5219                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5220                    UNSPEC_LD2_DREG))]
5221   "TARGET_SIMD"
5222   "ld1\\t{%S0.1d - %T0.1d}, %1"
5223   [(set_attr "type" "neon_load1_2reg<q>")]
5224 )
5225
5226 (define_insn "aarch64_ld3<mode>_dreg"
5227   [(set (match_operand:CI 0 "register_operand" "=w")
5228         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5229                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5230                    UNSPEC_LD3_DREG))]
5231   "TARGET_SIMD"
5232   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5233   [(set_attr "type" "neon_load3_3reg<q>")]
5234 )
5235
5236 (define_insn "aarch64_ld3<mode>_dreg"
5237   [(set (match_operand:CI 0 "register_operand" "=w")
5238         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5239                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5240                    UNSPEC_LD3_DREG))]
5241   "TARGET_SIMD"
5242   "ld1\\t{%S0.1d - %U0.1d}, %1"
5243   [(set_attr "type" "neon_load1_3reg<q>")]
5244 )
5245
5246 (define_insn "aarch64_ld4<mode>_dreg"
5247   [(set (match_operand:XI 0 "register_operand" "=w")
5248         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5249                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5250                    UNSPEC_LD4_DREG))]
5251   "TARGET_SIMD"
5252   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5253   [(set_attr "type" "neon_load4_4reg<q>")]
5254 )
5255
5256 (define_insn "aarch64_ld4<mode>_dreg"
5257   [(set (match_operand:XI 0 "register_operand" "=w")
5258         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5259                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5260                    UNSPEC_LD4_DREG))]
5261   "TARGET_SIMD"
5262   "ld1\\t{%S0.1d - %V0.1d}, %1"
5263   [(set_attr "type" "neon_load1_4reg<q>")]
5264 )
5265
5266 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5267  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5268   (match_operand:DI 1 "register_operand" "r")
5269   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5270   "TARGET_SIMD"
5271 {
5272   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5273   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5274
5275   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5276   DONE;
5277 })
5278
5279 (define_expand "aarch64_ld1<VALL_F16:mode>"
5280  [(match_operand:VALL_F16 0 "register_operand")
5281   (match_operand:DI 1 "register_operand")]
5282   "TARGET_SIMD"
5283 {
5284   machine_mode mode = <VALL_F16:MODE>mode;
5285   rtx mem = gen_rtx_MEM (mode, operands[1]);
5286
5287   if (BYTES_BIG_ENDIAN)
5288     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5289   else
5290     emit_move_insn (operands[0], mem);
5291   DONE;
5292 })
5293
5294 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5295  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5296   (match_operand:DI 1 "register_operand" "r")
5297   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5298   "TARGET_SIMD"
5299 {
5300   machine_mode mode = <VSTRUCT:MODE>mode;
5301   rtx mem = gen_rtx_MEM (mode, operands[1]);
5302
5303   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5304   DONE;
5305 })
5306
5307 (define_expand "aarch64_ld1x2<VQ:mode>"
5308  [(match_operand:OI 0 "register_operand" "=w")
5309   (match_operand:DI 1 "register_operand" "r")
5310   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5311   "TARGET_SIMD"
5312 {
5313   machine_mode mode = OImode;
5314   rtx mem = gen_rtx_MEM (mode, operands[1]);
5315
5316   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5317   DONE;
5318 })
5319
5320 (define_expand "aarch64_ld1x2<VDC:mode>"
5321  [(match_operand:OI 0 "register_operand" "=w")
5322   (match_operand:DI 1 "register_operand" "r")
5323   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5324   "TARGET_SIMD"
5325 {
5326   machine_mode mode = OImode;
5327   rtx mem = gen_rtx_MEM (mode, operands[1]);
5328
5329   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5330   DONE;
5331 })
5332
5333
5334 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5335   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5336         (match_operand:DI 1 "register_operand" "w")
5337         (match_operand:VSTRUCT 2 "register_operand" "0")
5338         (match_operand:SI 3 "immediate_operand" "i")
5339         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5340   "TARGET_SIMD"
5341 {
5342   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5343   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5344                      * <VSTRUCT:nregs>);
5345
5346   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5347   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5348         operands[0], mem, operands[2], operands[3]));
5349   DONE;
5350 })
5351
5352 ;; Expanders for builtins to extract vector registers from large
5353 ;; opaque integer modes.
5354
5355 ;; D-register list.
5356
5357 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5358  [(match_operand:VDC 0 "register_operand" "=w")
5359   (match_operand:VSTRUCT 1 "register_operand" "w")
5360   (match_operand:SI 2 "immediate_operand" "i")]
5361   "TARGET_SIMD"
5362 {
5363   int part = INTVAL (operands[2]);
5364   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5365   int offset = part * 16;
5366
5367   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5368   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5369   DONE;
5370 })
5371
5372 ;; Q-register list.
5373
5374 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5375  [(match_operand:VQ 0 "register_operand" "=w")
5376   (match_operand:VSTRUCT 1 "register_operand" "w")
5377   (match_operand:SI 2 "immediate_operand" "i")]
5378   "TARGET_SIMD"
5379 {
5380   int part = INTVAL (operands[2]);
5381   int offset = part * 16;
5382
5383   emit_move_insn (operands[0],
5384                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5385   DONE;
5386 })
5387
5388 ;; Permuted-store expanders for neon intrinsics.
5389
5390 ;; Permute instructions
5391
5392 ;; vec_perm support
5393
5394 (define_expand "vec_perm<mode>"
5395   [(match_operand:VB 0 "register_operand")
5396    (match_operand:VB 1 "register_operand")
5397    (match_operand:VB 2 "register_operand")
5398    (match_operand:VB 3 "register_operand")]
5399   "TARGET_SIMD"
5400 {
5401   aarch64_expand_vec_perm (operands[0], operands[1],
5402                            operands[2], operands[3], <nunits>);
5403   DONE;
5404 })
5405
5406 (define_insn "aarch64_tbl1<mode>"
5407   [(set (match_operand:VB 0 "register_operand" "=w")
5408         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5409                     (match_operand:VB 2 "register_operand" "w")]
5410                    UNSPEC_TBL))]
5411   "TARGET_SIMD"
5412   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5413   [(set_attr "type" "neon_tbl1<q>")]
5414 )
5415
5416 ;; Two source registers.
5417
5418 (define_insn "aarch64_tbl2v16qi"
5419   [(set (match_operand:V16QI 0 "register_operand" "=w")
5420         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5421                        (match_operand:V16QI 2 "register_operand" "w")]
5422                       UNSPEC_TBL))]
5423   "TARGET_SIMD"
5424   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5425   [(set_attr "type" "neon_tbl2_q")]
5426 )
5427
5428 (define_insn "aarch64_tbl3<mode>"
5429   [(set (match_operand:VB 0 "register_operand" "=w")
5430         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5431                       (match_operand:VB 2 "register_operand" "w")]
5432                       UNSPEC_TBL))]
5433   "TARGET_SIMD"
5434   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5435   [(set_attr "type" "neon_tbl3")]
5436 )
5437
5438 (define_insn "aarch64_tbx4<mode>"
5439   [(set (match_operand:VB 0 "register_operand" "=w")
5440         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5441                       (match_operand:OI 2 "register_operand" "w")
5442                       (match_operand:VB 3 "register_operand" "w")]
5443                       UNSPEC_TBX))]
5444   "TARGET_SIMD"
5445   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5446   [(set_attr "type" "neon_tbl4")]
5447 )
5448
5449 ;; Three source registers.
5450
5451 (define_insn "aarch64_qtbl3<mode>"
5452   [(set (match_operand:VB 0 "register_operand" "=w")
5453         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5454                       (match_operand:VB 2 "register_operand" "w")]
5455                       UNSPEC_TBL))]
5456   "TARGET_SIMD"
5457   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5458   [(set_attr "type" "neon_tbl3")]
5459 )
5460
5461 (define_insn "aarch64_qtbx3<mode>"
5462   [(set (match_operand:VB 0 "register_operand" "=w")
5463         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5464                       (match_operand:CI 2 "register_operand" "w")
5465                       (match_operand:VB 3 "register_operand" "w")]
5466                       UNSPEC_TBX))]
5467   "TARGET_SIMD"
5468   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5469   [(set_attr "type" "neon_tbl3")]
5470 )
5471
5472 ;; Four source registers.
5473
5474 (define_insn "aarch64_qtbl4<mode>"
5475   [(set (match_operand:VB 0 "register_operand" "=w")
5476         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5477                       (match_operand:VB 2 "register_operand" "w")]
5478                       UNSPEC_TBL))]
5479   "TARGET_SIMD"
5480   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5481   [(set_attr "type" "neon_tbl4")]
5482 )
5483
5484 (define_insn "aarch64_qtbx4<mode>"
5485   [(set (match_operand:VB 0 "register_operand" "=w")
5486         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5487                       (match_operand:XI 2 "register_operand" "w")
5488                       (match_operand:VB 3 "register_operand" "w")]
5489                       UNSPEC_TBX))]
5490   "TARGET_SIMD"
5491   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5492   [(set_attr "type" "neon_tbl4")]
5493 )
5494
5495 (define_insn_and_split "aarch64_combinev16qi"
5496   [(set (match_operand:OI 0 "register_operand" "=w")
5497         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5498                     (match_operand:V16QI 2 "register_operand" "w")]
5499                    UNSPEC_CONCAT))]
5500   "TARGET_SIMD"
5501   "#"
5502   "&& reload_completed"
5503   [(const_int 0)]
5504 {
5505   aarch64_split_combinev16qi (operands);
5506   DONE;
5507 }
5508 [(set_attr "type" "multiple")]
5509 )
5510
5511 ;; This instruction's pattern is generated directly by
5512 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5513 ;; need corresponding changes there.
5514 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5515   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5516         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5517                           (match_operand:VALL_F16 2 "register_operand" "w")]
5518          PERMUTE))]
5519   "TARGET_SIMD"
5520   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5521   [(set_attr "type" "neon_permute<q>")]
5522 )
5523
5524 ;; This instruction's pattern is generated directly by
5525 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5526 ;; need corresponding changes there.  Note that the immediate (third)
5527 ;; operand is a lane index not a byte index.
5528 (define_insn "aarch64_ext<mode>"
5529   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5530         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5531                           (match_operand:VALL_F16 2 "register_operand" "w")
5532                           (match_operand:SI 3 "immediate_operand" "i")]
5533          UNSPEC_EXT))]
5534   "TARGET_SIMD"
5535 {
5536   operands[3] = GEN_INT (INTVAL (operands[3])
5537       * GET_MODE_UNIT_SIZE (<MODE>mode));
5538   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5539 }
5540   [(set_attr "type" "neon_ext<q>")]
5541 )
5542
5543 ;; This instruction's pattern is generated directly by
5544 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5545 ;; need corresponding changes there.
5546 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5547   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5548         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5549                     REVERSE))]
5550   "TARGET_SIMD"
5551   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5552   [(set_attr "type" "neon_rev<q>")]
5553 )
5554
5555 (define_insn "aarch64_st2<mode>_dreg"
5556   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5557         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5558                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5559                    UNSPEC_ST2))]
5560   "TARGET_SIMD"
5561   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5562   [(set_attr "type" "neon_store2_2reg")]
5563 )
5564
5565 (define_insn "aarch64_st2<mode>_dreg"
5566   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5567         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5568                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5569                    UNSPEC_ST2))]
5570   "TARGET_SIMD"
5571   "st1\\t{%S1.1d - %T1.1d}, %0"
5572   [(set_attr "type" "neon_store1_2reg")]
5573 )
5574
5575 (define_insn "aarch64_st3<mode>_dreg"
5576   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5577         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5578                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5579                    UNSPEC_ST3))]
5580   "TARGET_SIMD"
5581   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5582   [(set_attr "type" "neon_store3_3reg")]
5583 )
5584
5585 (define_insn "aarch64_st3<mode>_dreg"
5586   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5587         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5588                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5589                    UNSPEC_ST3))]
5590   "TARGET_SIMD"
5591   "st1\\t{%S1.1d - %U1.1d}, %0"
5592   [(set_attr "type" "neon_store1_3reg")]
5593 )
5594
5595 (define_insn "aarch64_st4<mode>_dreg"
5596   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5597         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5598                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5599                    UNSPEC_ST4))]
5600   "TARGET_SIMD"
5601   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5602   [(set_attr "type" "neon_store4_4reg")]
5603 )
5604
5605 (define_insn "aarch64_st4<mode>_dreg"
5606   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5607         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5608                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5609                    UNSPEC_ST4))]
5610   "TARGET_SIMD"
5611   "st1\\t{%S1.1d - %V1.1d}, %0"
5612   [(set_attr "type" "neon_store1_4reg")]
5613 )
5614
5615 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5616  [(match_operand:DI 0 "register_operand" "r")
5617   (match_operand:VSTRUCT 1 "register_operand" "w")
5618   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5619   "TARGET_SIMD"
5620 {
5621   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5622   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5623
5624   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5625   DONE;
5626 })
5627
5628 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5629  [(match_operand:DI 0 "register_operand" "r")
5630   (match_operand:VSTRUCT 1 "register_operand" "w")
5631   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5632   "TARGET_SIMD"
5633 {
5634   machine_mode mode = <VSTRUCT:MODE>mode;
5635   rtx mem = gen_rtx_MEM (mode, operands[0]);
5636
5637   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5638   DONE;
5639 })
5640
5641 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5642  [(match_operand:DI 0 "register_operand" "r")
5643   (match_operand:VSTRUCT 1 "register_operand" "w")
5644   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5645   (match_operand:SI 2 "immediate_operand")]
5646   "TARGET_SIMD"
5647 {
5648   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5649   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5650                      * <VSTRUCT:nregs>);
5651
5652   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5653                 mem, operands[1], operands[2]));
5654   DONE;
5655 })
5656
5657 (define_expand "aarch64_st1<VALL_F16:mode>"
5658  [(match_operand:DI 0 "register_operand")
5659   (match_operand:VALL_F16 1 "register_operand")]
5660   "TARGET_SIMD"
5661 {
5662   machine_mode mode = <VALL_F16:MODE>mode;
5663   rtx mem = gen_rtx_MEM (mode, operands[0]);
5664
5665   if (BYTES_BIG_ENDIAN)
5666     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5667   else
5668     emit_move_insn (mem, operands[1]);
5669   DONE;
5670 })
5671
5672 ;; Expander for builtins to insert vector registers into large
5673 ;; opaque integer modes.
5674
5675 ;; Q-register list.  We don't need a D-reg inserter as we zero
5676 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5677
5678 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5679  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5680   (match_operand:VSTRUCT 1 "register_operand" "0")
5681   (match_operand:VQ 2 "register_operand" "w")
5682   (match_operand:SI 3 "immediate_operand" "i")]
5683   "TARGET_SIMD"
5684 {
5685   int part = INTVAL (operands[3]);
5686   int offset = part * 16;
5687
5688   emit_move_insn (operands[0], operands[1]);
5689   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5690                   operands[2]);
5691   DONE;
5692 })
5693
5694 ;; Standard pattern name vec_init<mode><Vel>.
5695
5696 (define_expand "vec_init<mode><Vel>"
5697   [(match_operand:VALL_F16 0 "register_operand" "")
5698    (match_operand 1 "" "")]
5699   "TARGET_SIMD"
5700 {
5701   aarch64_expand_vector_init (operands[0], operands[1]);
5702   DONE;
5703 })
5704
5705 (define_insn "*aarch64_simd_ld1r<mode>"
5706   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5707         (vec_duplicate:VALL_F16
5708           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5709   "TARGET_SIMD"
5710   "ld1r\\t{%0.<Vtype>}, %1"
5711   [(set_attr "type" "neon_load1_all_lanes")]
5712 )
5713
5714 (define_insn "aarch64_simd_ld1<mode>_x2"
5715   [(set (match_operand:OI 0 "register_operand" "=w")
5716         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5717                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5718                    UNSPEC_LD1))]
5719   "TARGET_SIMD"
5720   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5721   [(set_attr "type" "neon_load1_2reg<q>")]
5722 )
5723
5724 (define_insn "aarch64_simd_ld1<mode>_x2"
5725   [(set (match_operand:OI 0 "register_operand" "=w")
5726         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5727                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5728                    UNSPEC_LD1))]
5729   "TARGET_SIMD"
5730   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5731   [(set_attr "type" "neon_load1_2reg<q>")]
5732 )
5733
5734
5735 (define_insn "aarch64_frecpe<mode>"
5736   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5737         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5738          UNSPEC_FRECPE))]
5739   "TARGET_SIMD"
5740   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5741   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5742 )
5743
5744 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5745   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5746         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5747          FRECP))]
5748   "TARGET_SIMD"
5749   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5750   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5751 )
5752
5753 (define_insn "aarch64_frecps<mode>"
5754   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5755         (unspec:VHSDF_HSDF
5756           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5757           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5758           UNSPEC_FRECPS))]
5759   "TARGET_SIMD"
5760   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5761   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5762 )
5763
5764 (define_insn "aarch64_urecpe<mode>"
5765   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5766         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5767                 UNSPEC_URECPE))]
5768  "TARGET_SIMD"
5769  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5770   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5771
5772 ;; Standard pattern name vec_extract<mode><Vel>.
5773
5774 (define_expand "vec_extract<mode><Vel>"
5775   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5776    (match_operand:VALL_F16 1 "register_operand" "")
5777    (match_operand:SI 2 "immediate_operand" "")]
5778   "TARGET_SIMD"
5779 {
5780     emit_insn
5781       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5782     DONE;
5783 })
5784
5785 ;; aes
5786
5787 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5788   [(set (match_operand:V16QI 0 "register_operand" "=w")
5789         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5790                        (match_operand:V16QI 2 "register_operand" "w")]
5791          CRYPTO_AES))]
5792   "TARGET_SIMD && TARGET_AES"
5793   "aes<aes_op>\\t%0.16b, %2.16b"
5794   [(set_attr "type" "crypto_aese")]
5795 )
5796
5797 ;; When AES/AESMC fusion is enabled we want the register allocation to
5798 ;; look like:
5799 ;;    AESE Vn, _
5800 ;;    AESMC Vn, Vn
5801 ;; So prefer to tie operand 1 to operand 0 when fusing.
5802
5803 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5804   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5805         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5806          CRYPTO_AESMC))]
5807   "TARGET_SIMD && TARGET_AES"
5808   "aes<aesmc_op>\\t%0.16b, %1.16b"
5809   [(set_attr "type" "crypto_aesmc")
5810    (set_attr_alternative "enabled"
5811      [(if_then_else (match_test
5812                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5813                      (const_string "yes" )
5814                      (const_string "no"))
5815       (const_string "yes")])]
5816 )
5817
5818 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5819 ;; and enforce the register dependency without scheduling or register
5820 ;; allocation messing up the order or introducing moves inbetween.
5821 ;;  Mash the two together during combine.
5822
5823 (define_insn "*aarch64_crypto_aese_fused"
5824   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5825         (unspec:V16QI
5826           [(unspec:V16QI
5827             [(match_operand:V16QI 1 "register_operand" "0")
5828              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5829           ] UNSPEC_AESMC))]
5830   "TARGET_SIMD && TARGET_AES
5831    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5832   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5833   [(set_attr "type" "crypto_aese")
5834    (set_attr "length" "8")]
5835 )
5836
5837 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
5838 ;; and enforce the register dependency without scheduling or register
5839 ;; allocation messing up the order or introducing moves inbetween.
5840 ;;  Mash the two together during combine.
5841
5842 (define_insn "*aarch64_crypto_aesd_fused"
5843   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5844         (unspec:V16QI
5845           [(unspec:V16QI
5846             [(match_operand:V16QI 1 "register_operand" "0")
5847              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
5848           ] UNSPEC_AESIMC))]
5849   "TARGET_SIMD && TARGET_AES
5850    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5851   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
5852   [(set_attr "type" "crypto_aese")
5853    (set_attr "length" "8")]
5854 )
5855
5856 ;; sha1
5857
5858 (define_insn "aarch64_crypto_sha1hsi"
5859   [(set (match_operand:SI 0 "register_operand" "=w")
5860         (unspec:SI [(match_operand:SI 1
5861                        "register_operand" "w")]
5862          UNSPEC_SHA1H))]
5863   "TARGET_SIMD && TARGET_SHA2"
5864   "sha1h\\t%s0, %s1"
5865   [(set_attr "type" "crypto_sha1_fast")]
5866 )
5867
5868 (define_insn "aarch64_crypto_sha1hv4si"
5869   [(set (match_operand:SI 0 "register_operand" "=w")
5870         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5871                      (parallel [(const_int 0)]))]
5872          UNSPEC_SHA1H))]
5873   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5874   "sha1h\\t%s0, %s1"
5875   [(set_attr "type" "crypto_sha1_fast")]
5876 )
5877
5878 (define_insn "aarch64_be_crypto_sha1hv4si"
5879   [(set (match_operand:SI 0 "register_operand" "=w")
5880         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5881                      (parallel [(const_int 3)]))]
5882          UNSPEC_SHA1H))]
5883   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5884   "sha1h\\t%s0, %s1"
5885   [(set_attr "type" "crypto_sha1_fast")]
5886 )
5887
5888 (define_insn "aarch64_crypto_sha1su1v4si"
5889   [(set (match_operand:V4SI 0 "register_operand" "=w")
5890         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5891                       (match_operand:V4SI 2 "register_operand" "w")]
5892          UNSPEC_SHA1SU1))]
5893   "TARGET_SIMD && TARGET_SHA2"
5894   "sha1su1\\t%0.4s, %2.4s"
5895   [(set_attr "type" "crypto_sha1_fast")]
5896 )
5897
5898 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5899   [(set (match_operand:V4SI 0 "register_operand" "=w")
5900         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5901                       (match_operand:SI 2 "register_operand" "w")
5902                       (match_operand:V4SI 3 "register_operand" "w")]
5903          CRYPTO_SHA1))]
5904   "TARGET_SIMD && TARGET_SHA2"
5905   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5906   [(set_attr "type" "crypto_sha1_slow")]
5907 )
5908
5909 (define_insn "aarch64_crypto_sha1su0v4si"
5910   [(set (match_operand:V4SI 0 "register_operand" "=w")
5911         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5912                       (match_operand:V4SI 2 "register_operand" "w")
5913                       (match_operand:V4SI 3 "register_operand" "w")]
5914          UNSPEC_SHA1SU0))]
5915   "TARGET_SIMD && TARGET_SHA2"
5916   "sha1su0\\t%0.4s, %2.4s, %3.4s"
5917   [(set_attr "type" "crypto_sha1_xor")]
5918 )
5919
5920 ;; sha256
5921
5922 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5923   [(set (match_operand:V4SI 0 "register_operand" "=w")
5924         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5925                       (match_operand:V4SI 2 "register_operand" "w")
5926                       (match_operand:V4SI 3 "register_operand" "w")]
5927          CRYPTO_SHA256))]
5928   "TARGET_SIMD && TARGET_SHA2"
5929   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5930   [(set_attr "type" "crypto_sha256_slow")]
5931 )
5932
5933 (define_insn "aarch64_crypto_sha256su0v4si"
5934   [(set (match_operand:V4SI 0 "register_operand" "=w")
5935         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5936                       (match_operand:V4SI 2 "register_operand" "w")]
5937          UNSPEC_SHA256SU0))]
5938   "TARGET_SIMD && TARGET_SHA2"
5939   "sha256su0\\t%0.4s, %2.4s"
5940   [(set_attr "type" "crypto_sha256_fast")]
5941 )
5942
5943 (define_insn "aarch64_crypto_sha256su1v4si"
5944   [(set (match_operand:V4SI 0 "register_operand" "=w")
5945         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5946                       (match_operand:V4SI 2 "register_operand" "w")
5947                       (match_operand:V4SI 3 "register_operand" "w")]
5948          UNSPEC_SHA256SU1))]
5949   "TARGET_SIMD && TARGET_SHA2"
5950   "sha256su1\\t%0.4s, %2.4s, %3.4s"
5951   [(set_attr "type" "crypto_sha256_slow")]
5952 )
5953
5954 ;; sha512
5955
5956 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
5957   [(set (match_operand:V2DI 0 "register_operand" "=w")
5958         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5959                       (match_operand:V2DI 2 "register_operand" "w")
5960                       (match_operand:V2DI 3 "register_operand" "w")]
5961          CRYPTO_SHA512))]
5962   "TARGET_SIMD && TARGET_SHA3"
5963   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
5964   [(set_attr "type" "crypto_sha512")]
5965 )
5966
5967 (define_insn "aarch64_crypto_sha512su0qv2di"
5968   [(set (match_operand:V2DI 0 "register_operand" "=w")
5969         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5970                       (match_operand:V2DI 2 "register_operand" "w")]
5971          UNSPEC_SHA512SU0))]
5972   "TARGET_SIMD && TARGET_SHA3"
5973   "sha512su0\\t%0.2d, %2.2d"
5974   [(set_attr "type" "crypto_sha512")]
5975 )
5976
5977 (define_insn "aarch64_crypto_sha512su1qv2di"
5978   [(set (match_operand:V2DI 0 "register_operand" "=w")
5979         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5980                       (match_operand:V2DI 2 "register_operand" "w")
5981                       (match_operand:V2DI 3 "register_operand" "w")]
5982          UNSPEC_SHA512SU1))]
5983   "TARGET_SIMD && TARGET_SHA3"
5984   "sha512su1\\t%0.2d, %2.2d, %3.2d"
5985   [(set_attr "type" "crypto_sha512")]
5986 )
5987
5988 ;; sha3
5989
5990 (define_insn "eor3q<mode>4"
5991   [(set (match_operand:VQ_I 0 "register_operand" "=w")
5992         (xor:VQ_I
5993          (xor:VQ_I
5994           (match_operand:VQ_I 2 "register_operand" "w")
5995           (match_operand:VQ_I 3 "register_operand" "w"))
5996          (match_operand:VQ_I 1 "register_operand" "w")))]
5997   "TARGET_SIMD && TARGET_SHA3"
5998   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
5999   [(set_attr "type" "crypto_sha3")]
6000 )
6001
6002 (define_insn "aarch64_rax1qv2di"
6003   [(set (match_operand:V2DI 0 "register_operand" "=w")
6004         (xor:V2DI
6005          (rotate:V2DI
6006           (match_operand:V2DI 2 "register_operand" "w")
6007           (const_int 1))
6008          (match_operand:V2DI 1 "register_operand" "w")))]
6009   "TARGET_SIMD && TARGET_SHA3"
6010   "rax1\\t%0.2d, %1.2d, %2.2d"
6011   [(set_attr "type" "crypto_sha3")]
6012 )
6013
6014 (define_insn "aarch64_xarqv2di"
6015   [(set (match_operand:V2DI 0 "register_operand" "=w")
6016         (rotatert:V2DI
6017          (xor:V2DI
6018           (match_operand:V2DI 1 "register_operand" "%w")
6019           (match_operand:V2DI 2 "register_operand" "w"))
6020          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6021   "TARGET_SIMD && TARGET_SHA3"
6022   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6023   [(set_attr "type" "crypto_sha3")]
6024 )
6025
6026 (define_insn "bcaxq<mode>4"
6027   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6028         (xor:VQ_I
6029          (and:VQ_I
6030           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6031           (match_operand:VQ_I 2 "register_operand" "w"))
6032          (match_operand:VQ_I 1 "register_operand" "w")))]
6033   "TARGET_SIMD && TARGET_SHA3"
6034   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6035   [(set_attr "type" "crypto_sha3")]
6036 )
6037
6038 ;; SM3
6039
6040 (define_insn "aarch64_sm3ss1qv4si"
6041   [(set (match_operand:V4SI 0 "register_operand" "=w")
6042         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6043                       (match_operand:V4SI 2 "register_operand" "w")
6044                       (match_operand:V4SI 3 "register_operand" "w")]
6045          UNSPEC_SM3SS1))]
6046   "TARGET_SIMD && TARGET_SM4"
6047   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6048   [(set_attr "type" "crypto_sm3")]
6049 )
6050
6051
6052 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6053   [(set (match_operand:V4SI 0 "register_operand" "=w")
6054         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6055                       (match_operand:V4SI 2 "register_operand" "w")
6056                       (match_operand:V4SI 3 "register_operand" "w")
6057                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6058          CRYPTO_SM3TT))]
6059   "TARGET_SIMD && TARGET_SM4"
6060   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6061   [(set_attr "type" "crypto_sm3")]
6062 )
6063
6064 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6065   [(set (match_operand:V4SI 0 "register_operand" "=w")
6066         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6067                       (match_operand:V4SI 2 "register_operand" "w")
6068                       (match_operand:V4SI 3 "register_operand" "w")]
6069          CRYPTO_SM3PART))]
6070   "TARGET_SIMD && TARGET_SM4"
6071   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6072   [(set_attr "type" "crypto_sm3")]
6073 )
6074
6075 ;; SM4
6076
6077 (define_insn "aarch64_sm4eqv4si"
6078   [(set (match_operand:V4SI 0 "register_operand" "=w")
6079         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6080                       (match_operand:V4SI 2 "register_operand" "w")]
6081          UNSPEC_SM4E))]
6082   "TARGET_SIMD && TARGET_SM4"
6083   "sm4e\\t%0.4s, %2.4s"
6084   [(set_attr "type" "crypto_sm4")]
6085 )
6086
6087 (define_insn "aarch64_sm4ekeyqv4si"
6088   [(set (match_operand:V4SI 0 "register_operand" "=w")
6089         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6090                       (match_operand:V4SI 2 "register_operand" "w")]
6091          UNSPEC_SM4EKEY))]
6092   "TARGET_SIMD && TARGET_SM4"
6093   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6094   [(set_attr "type" "crypto_sm4")]
6095 )
6096
6097 ;; fp16fml
6098
6099 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6100   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6101         (unspec:VDQSF
6102          [(match_operand:VDQSF 1 "register_operand" "0")
6103           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6104           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6105          VFMLA16_LOW))]
6106   "TARGET_F16FML"
6107 {
6108   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6109                                             <nunits> * 2, false);
6110   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6111                                             <nunits> * 2, false);
6112
6113   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6114                                                                 operands[1],
6115                                                                 operands[2],
6116                                                                 operands[3],
6117                                                                 p1, p2));
6118   DONE;
6119
6120 })
6121
6122 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6123   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6124         (unspec:VDQSF
6125          [(match_operand:VDQSF 1 "register_operand" "0")
6126           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6127           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6128          VFMLA16_HIGH))]
6129   "TARGET_F16FML"
6130 {
6131   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6132   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6133
6134   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6135                                                                  operands[1],
6136                                                                  operands[2],
6137                                                                  operands[3],
6138                                                                  p1, p2));
6139   DONE;
6140 })
6141
6142 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6143   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6144         (fma:VDQSF
6145          (float_extend:VDQSF
6146           (vec_select:<VFMLA_SEL_W>
6147            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6148            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6149          (float_extend:VDQSF
6150           (vec_select:<VFMLA_SEL_W>
6151            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6152            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6153          (match_operand:VDQSF 1 "register_operand" "0")))]
6154   "TARGET_F16FML"
6155   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6156   [(set_attr "type" "neon_fp_mul_s")]
6157 )
6158
6159 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6160   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6161         (fma:VDQSF
6162          (float_extend:VDQSF
6163           (neg:<VFMLA_SEL_W>
6164            (vec_select:<VFMLA_SEL_W>
6165             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6166             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6167          (float_extend:VDQSF
6168           (vec_select:<VFMLA_SEL_W>
6169            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6170            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6171          (match_operand:VDQSF 1 "register_operand" "0")))]
6172   "TARGET_F16FML"
6173   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6174   [(set_attr "type" "neon_fp_mul_s")]
6175 )
6176
6177 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6178   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6179         (fma:VDQSF
6180          (float_extend:VDQSF
6181           (vec_select:<VFMLA_SEL_W>
6182            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6183            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6184          (float_extend:VDQSF
6185           (vec_select:<VFMLA_SEL_W>
6186            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6187            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6188          (match_operand:VDQSF 1 "register_operand" "0")))]
6189   "TARGET_F16FML"
6190   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6191   [(set_attr "type" "neon_fp_mul_s")]
6192 )
6193
6194 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6195   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6196         (fma:VDQSF
6197          (float_extend:VDQSF
6198           (neg:<VFMLA_SEL_W>
6199            (vec_select:<VFMLA_SEL_W>
6200             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6201             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6202          (float_extend:VDQSF
6203           (vec_select:<VFMLA_SEL_W>
6204            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6205            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6206          (match_operand:VDQSF 1 "register_operand" "0")))]
6207   "TARGET_F16FML"
6208   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6209   [(set_attr "type" "neon_fp_mul_s")]
6210 )
6211
6212 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6213   [(set (match_operand:V2SF 0 "register_operand" "")
6214         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6215                            (match_operand:V4HF 2 "register_operand" "")
6216                            (match_operand:V4HF 3 "register_operand" "")
6217                            (match_operand:SI 4 "aarch64_imm2" "")]
6218          VFMLA16_LOW))]
6219   "TARGET_F16FML"
6220 {
6221     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6222     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6223
6224     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6225                                                             operands[1],
6226                                                             operands[2],
6227                                                             operands[3],
6228                                                             p1, lane));
6229     DONE;
6230 }
6231 )
6232
6233 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6234   [(set (match_operand:V2SF 0 "register_operand" "")
6235         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6236                            (match_operand:V4HF 2 "register_operand" "")
6237                            (match_operand:V4HF 3 "register_operand" "")
6238                            (match_operand:SI 4 "aarch64_imm2" "")]
6239          VFMLA16_HIGH))]
6240   "TARGET_F16FML"
6241 {
6242     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6243     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6244
6245     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6246                                                              operands[1],
6247                                                              operands[2],
6248                                                              operands[3],
6249                                                              p1, lane));
6250     DONE;
6251 })
6252
6253 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6254   [(set (match_operand:V2SF 0 "register_operand" "=w")
6255         (fma:V2SF
6256          (float_extend:V2SF
6257            (vec_select:V2HF
6258             (match_operand:V4HF 2 "register_operand" "w")
6259             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6260          (float_extend:V2SF
6261            (vec_duplicate:V2HF
6262             (vec_select:HF
6263              (match_operand:V4HF 3 "register_operand" "x")
6264              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6265          (match_operand:V2SF 1 "register_operand" "0")))]
6266   "TARGET_F16FML"
6267   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6268   [(set_attr "type" "neon_fp_mul_s")]
6269 )
6270
6271 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6272   [(set (match_operand:V2SF 0 "register_operand" "=w")
6273         (fma:V2SF
6274          (float_extend:V2SF
6275           (neg:V2HF
6276            (vec_select:V2HF
6277             (match_operand:V4HF 2 "register_operand" "w")
6278             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6279          (float_extend:V2SF
6280           (vec_duplicate:V2HF
6281            (vec_select:HF
6282             (match_operand:V4HF 3 "register_operand" "x")
6283             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6284          (match_operand:V2SF 1 "register_operand" "0")))]
6285   "TARGET_F16FML"
6286   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6287   [(set_attr "type" "neon_fp_mul_s")]
6288 )
6289
6290 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6291   [(set (match_operand:V2SF 0 "register_operand" "=w")
6292         (fma:V2SF
6293          (float_extend:V2SF
6294            (vec_select:V2HF
6295             (match_operand:V4HF 2 "register_operand" "w")
6296             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6297          (float_extend:V2SF
6298            (vec_duplicate:V2HF
6299             (vec_select:HF
6300              (match_operand:V4HF 3 "register_operand" "x")
6301              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6302          (match_operand:V2SF 1 "register_operand" "0")))]
6303   "TARGET_F16FML"
6304   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6305   [(set_attr "type" "neon_fp_mul_s")]
6306 )
6307
6308 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6309   [(set (match_operand:V2SF 0 "register_operand" "=w")
6310         (fma:V2SF
6311          (float_extend:V2SF
6312            (neg:V2HF
6313             (vec_select:V2HF
6314              (match_operand:V4HF 2 "register_operand" "w")
6315              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6316          (float_extend:V2SF
6317            (vec_duplicate:V2HF
6318             (vec_select:HF
6319              (match_operand:V4HF 3 "register_operand" "x")
6320              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6321          (match_operand:V2SF 1 "register_operand" "0")))]
6322   "TARGET_F16FML"
6323   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6324   [(set_attr "type" "neon_fp_mul_s")]
6325 )
6326
6327 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6328   [(set (match_operand:V4SF 0 "register_operand" "")
6329         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6330                            (match_operand:V8HF 2 "register_operand" "")
6331                            (match_operand:V8HF 3 "register_operand" "")
6332                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6333          VFMLA16_LOW))]
6334   "TARGET_F16FML"
6335 {
6336     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6337     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6338
6339     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6340                                                               operands[1],
6341                                                               operands[2],
6342                                                               operands[3],
6343                                                               p1, lane));
6344     DONE;
6345 })
6346
6347 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6348   [(set (match_operand:V4SF 0 "register_operand" "")
6349         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6350                            (match_operand:V8HF 2 "register_operand" "")
6351                            (match_operand:V8HF 3 "register_operand" "")
6352                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6353          VFMLA16_HIGH))]
6354   "TARGET_F16FML"
6355 {
6356     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6357     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6358
6359     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6360                                                                operands[1],
6361                                                                operands[2],
6362                                                                operands[3],
6363                                                                p1, lane));
6364     DONE;
6365 })
6366
6367 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6368   [(set (match_operand:V4SF 0 "register_operand" "=w")
6369         (fma:V4SF
6370          (float_extend:V4SF
6371           (vec_select:V4HF
6372             (match_operand:V8HF 2 "register_operand" "w")
6373             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6374          (float_extend:V4SF
6375           (vec_duplicate:V4HF
6376            (vec_select:HF
6377             (match_operand:V8HF 3 "register_operand" "x")
6378             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6379          (match_operand:V4SF 1 "register_operand" "0")))]
6380   "TARGET_F16FML"
6381   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6382   [(set_attr "type" "neon_fp_mul_s")]
6383 )
6384
6385 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6386   [(set (match_operand:V4SF 0 "register_operand" "=w")
6387         (fma:V4SF
6388           (float_extend:V4SF
6389            (neg:V4HF
6390             (vec_select:V4HF
6391              (match_operand:V8HF 2 "register_operand" "w")
6392              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6393          (float_extend:V4SF
6394           (vec_duplicate:V4HF
6395            (vec_select:HF
6396             (match_operand:V8HF 3 "register_operand" "x")
6397             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6398          (match_operand:V4SF 1 "register_operand" "0")))]
6399   "TARGET_F16FML"
6400   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6401   [(set_attr "type" "neon_fp_mul_s")]
6402 )
6403
6404 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6405   [(set (match_operand:V4SF 0 "register_operand" "=w")
6406         (fma:V4SF
6407          (float_extend:V4SF
6408           (vec_select:V4HF
6409             (match_operand:V8HF 2 "register_operand" "w")
6410             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6411          (float_extend:V4SF
6412           (vec_duplicate:V4HF
6413            (vec_select:HF
6414             (match_operand:V8HF 3 "register_operand" "x")
6415             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6416          (match_operand:V4SF 1 "register_operand" "0")))]
6417   "TARGET_F16FML"
6418   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6419   [(set_attr "type" "neon_fp_mul_s")]
6420 )
6421
6422 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6423   [(set (match_operand:V4SF 0 "register_operand" "=w")
6424         (fma:V4SF
6425          (float_extend:V4SF
6426           (neg:V4HF
6427            (vec_select:V4HF
6428             (match_operand:V8HF 2 "register_operand" "w")
6429             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6430          (float_extend:V4SF
6431           (vec_duplicate:V4HF
6432            (vec_select:HF
6433             (match_operand:V8HF 3 "register_operand" "x")
6434             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6435          (match_operand:V4SF 1 "register_operand" "0")))]
6436   "TARGET_F16FML"
6437   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6438   [(set_attr "type" "neon_fp_mul_s")]
6439 )
6440
6441 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6442   [(set (match_operand:V2SF 0 "register_operand" "")
6443         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6444                       (match_operand:V4HF 2 "register_operand" "")
6445                       (match_operand:V8HF 3 "register_operand" "")
6446                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6447          VFMLA16_LOW))]
6448   "TARGET_F16FML"
6449 {
6450     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6451     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6452
6453     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6454                                                              operands[1],
6455                                                              operands[2],
6456                                                              operands[3],
6457                                                              p1, lane));
6458     DONE;
6459
6460 })
6461
6462 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6463   [(set (match_operand:V2SF 0 "register_operand" "")
6464         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6465                       (match_operand:V4HF 2 "register_operand" "")
6466                       (match_operand:V8HF 3 "register_operand" "")
6467                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6468          VFMLA16_HIGH))]
6469   "TARGET_F16FML"
6470 {
6471     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6472     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6473
6474     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6475                                                               operands[1],
6476                                                               operands[2],
6477                                                               operands[3],
6478                                                               p1, lane));
6479     DONE;
6480
6481 })
6482
6483 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6484   [(set (match_operand:V2SF 0 "register_operand" "=w")
6485         (fma:V2SF
6486          (float_extend:V2SF
6487            (vec_select:V2HF
6488             (match_operand:V4HF 2 "register_operand" "w")
6489             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6490          (float_extend:V2SF
6491           (vec_duplicate:V2HF
6492            (vec_select:HF
6493             (match_operand:V8HF 3 "register_operand" "x")
6494             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6495          (match_operand:V2SF 1 "register_operand" "0")))]
6496   "TARGET_F16FML"
6497   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6498   [(set_attr "type" "neon_fp_mul_s")]
6499 )
6500
6501 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6502   [(set (match_operand:V2SF 0 "register_operand" "=w")
6503         (fma:V2SF
6504          (float_extend:V2SF
6505           (neg:V2HF
6506            (vec_select:V2HF
6507             (match_operand:V4HF 2 "register_operand" "w")
6508             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6509          (float_extend:V2SF
6510           (vec_duplicate:V2HF
6511            (vec_select:HF
6512             (match_operand:V8HF 3 "register_operand" "x")
6513             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6514          (match_operand:V2SF 1 "register_operand" "0")))]
6515   "TARGET_F16FML"
6516   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6517   [(set_attr "type" "neon_fp_mul_s")]
6518 )
6519
6520 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6521   [(set (match_operand:V2SF 0 "register_operand" "=w")
6522         (fma:V2SF
6523          (float_extend:V2SF
6524            (vec_select:V2HF
6525             (match_operand:V4HF 2 "register_operand" "w")
6526             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6527          (float_extend:V2SF
6528           (vec_duplicate:V2HF
6529            (vec_select:HF
6530             (match_operand:V8HF 3 "register_operand" "x")
6531             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6532          (match_operand:V2SF 1 "register_operand" "0")))]
6533   "TARGET_F16FML"
6534   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6535   [(set_attr "type" "neon_fp_mul_s")]
6536 )
6537
6538 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6539   [(set (match_operand:V2SF 0 "register_operand" "=w")
6540         (fma:V2SF
6541          (float_extend:V2SF
6542           (neg:V2HF
6543            (vec_select:V2HF
6544             (match_operand:V4HF 2 "register_operand" "w")
6545             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6546          (float_extend:V2SF
6547           (vec_duplicate:V2HF
6548            (vec_select:HF
6549             (match_operand:V8HF 3 "register_operand" "x")
6550             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6551          (match_operand:V2SF 1 "register_operand" "0")))]
6552   "TARGET_F16FML"
6553   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6554   [(set_attr "type" "neon_fp_mul_s")]
6555 )
6556
6557 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6558   [(set (match_operand:V4SF 0 "register_operand" "")
6559         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6560                       (match_operand:V8HF 2 "register_operand" "")
6561                       (match_operand:V4HF 3 "register_operand" "")
6562                       (match_operand:SI 4 "aarch64_imm2" "")]
6563          VFMLA16_LOW))]
6564   "TARGET_F16FML"
6565 {
6566     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6567     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6568
6569     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6570                                                              operands[1],
6571                                                              operands[2],
6572                                                              operands[3],
6573                                                              p1, lane));
6574     DONE;
6575 })
6576
6577 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6578   [(set (match_operand:V4SF 0 "register_operand" "")
6579         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6580                       (match_operand:V8HF 2 "register_operand" "")
6581                       (match_operand:V4HF 3 "register_operand" "")
6582                       (match_operand:SI 4 "aarch64_imm2" "")]
6583          VFMLA16_HIGH))]
6584   "TARGET_F16FML"
6585 {
6586     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6587     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6588
6589     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6590                                                               operands[1],
6591                                                               operands[2],
6592                                                               operands[3],
6593                                                               p1, lane));
6594     DONE;
6595 })
6596
6597 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6598   [(set (match_operand:V4SF 0 "register_operand" "=w")
6599         (fma:V4SF
6600          (float_extend:V4SF
6601           (vec_select:V4HF
6602            (match_operand:V8HF 2 "register_operand" "w")
6603            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6604          (float_extend:V4SF
6605           (vec_duplicate:V4HF
6606            (vec_select:HF
6607             (match_operand:V4HF 3 "register_operand" "x")
6608             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6609          (match_operand:V4SF 1 "register_operand" "0")))]
6610   "TARGET_F16FML"
6611   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6612   [(set_attr "type" "neon_fp_mul_s")]
6613 )
6614
6615 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6616   [(set (match_operand:V4SF 0 "register_operand" "=w")
6617         (fma:V4SF
6618          (float_extend:V4SF
6619           (neg:V4HF
6620            (vec_select:V4HF
6621             (match_operand:V8HF 2 "register_operand" "w")
6622             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6623          (float_extend:V4SF
6624           (vec_duplicate:V4HF
6625            (vec_select:HF
6626             (match_operand:V4HF 3 "register_operand" "x")
6627             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6628          (match_operand:V4SF 1 "register_operand" "0")))]
6629   "TARGET_F16FML"
6630   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6631   [(set_attr "type" "neon_fp_mul_s")]
6632 )
6633
6634 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6635   [(set (match_operand:V4SF 0 "register_operand" "=w")
6636         (fma:V4SF
6637          (float_extend:V4SF
6638           (vec_select:V4HF
6639            (match_operand:V8HF 2 "register_operand" "w")
6640            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6641          (float_extend:V4SF
6642           (vec_duplicate:V4HF
6643            (vec_select:HF
6644             (match_operand:V4HF 3 "register_operand" "x")
6645             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6646          (match_operand:V4SF 1 "register_operand" "0")))]
6647   "TARGET_F16FML"
6648   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6649   [(set_attr "type" "neon_fp_mul_s")]
6650 )
6651
6652 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6653   [(set (match_operand:V4SF 0 "register_operand" "=w")
6654         (fma:V4SF
6655          (float_extend:V4SF
6656           (neg:V4HF
6657            (vec_select:V4HF
6658             (match_operand:V8HF 2 "register_operand" "w")
6659             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6660          (float_extend:V4SF
6661           (vec_duplicate:V4HF
6662            (vec_select:HF
6663             (match_operand:V4HF 3 "register_operand" "x")
6664             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6665          (match_operand:V4SF 1 "register_operand" "0")))]
6666   "TARGET_F16FML"
6667   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6668   [(set_attr "type" "neon_fp_mul_s")]
6669 )
6670
6671 ;; pmull
6672
6673 (define_insn "aarch64_crypto_pmulldi"
6674   [(set (match_operand:TI 0 "register_operand" "=w")
6675         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6676                      (match_operand:DI 2 "register_operand" "w")]
6677                     UNSPEC_PMULL))]
6678  "TARGET_SIMD && TARGET_AES"
6679  "pmull\\t%0.1q, %1.1d, %2.1d"
6680   [(set_attr "type" "crypto_pmull")]
6681 )
6682
6683 (define_insn "aarch64_crypto_pmullv2di"
6684  [(set (match_operand:TI 0 "register_operand" "=w")
6685        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6686                    (match_operand:V2DI 2 "register_operand" "w")]
6687                   UNSPEC_PMULL2))]
6688   "TARGET_SIMD && TARGET_AES"
6689   "pmull2\\t%0.1q, %1.2d, %2.2d"
6690   [(set_attr "type" "crypto_pmull")]
6691 )